def main(): print('Making features from raw data...') data_dir = join(get_project_path(), 'data', 'raw') output_dir = join(get_project_path(), 'data', 'processed') makedirs(output_dir, exist_ok=True) coins = ['BTC', 'ETH'] TARGET = 'close' Tx = 72 Ty = 1 TEST_SIZE = 0.05 WAVELET = 'haar' for SYM in coins: raw_data_path = join(data_dir, SYM + '.csv') print('Featurizing raw {} data from {}...'.format(SYM, raw_data_path)) raw_df = pd.read_csv(raw_data_path, index_col=0) preprocessor = DWTSmoothPreprocessor(production=False, target_col=TARGET, Tx=Tx, Ty=Ty, wavelet=WAVELET) X_smoothed, y = preprocessor.fit_transform(raw_df) X_train, X_test, y_train, y_test = train_test_split(X_smoothed, y, test_size=TEST_SIZE, shuffle=False) np.save(arr=X_train, file=join(output_dir, 'X_train_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx))) np.save(arr=X_test, file=join(output_dir, 'X_test_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx))) np.save(arr=y_train, file=join(output_dir, 'y_train_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx))) np.save(arr=y_test, file=join(output_dir, 'y_test_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx)))
def setUp(self): np.random.seed(31337) self.project_path = get_project_path() self.data_dir = join(self.project_path, 'crypr', 'tests', 'data') self.SYM = 'ETH' self.LAST_N_HOURS = 14000 self.FEATURE_WINDOW = 72 self.MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72] self.TARGET = 'close' self.Tx = 72 self.Ty = 1 self.TEST_SIZE = 0.05 self.end_time = utc_timestamp_ymd(2018, 6, 27) self.data = retrieve_all_data(coin=self.SYM, num_hours=self.LAST_N_HOURS, comparison_symbol='USD', end_time=self.end_time) self.predict_data = retrieve_all_data(coin=self.SYM, num_hours=self.Tx + self.FEATURE_WINDOW - 1, comparison_symbol='USD', end_time=self.end_time) self.X_shape = (13852, 1224) self.y_shape = (13852, 1) self.X_sample = 709.48 self.y_sample = -1.498064809896027 self.X_train_shape = (13159, 1224) self.X_test_shape = (693, 1224) self.y_train_shape = (13159, 1) self.y_test_shape = (693, 1) self.X_train_sample = 11.41 self.y_train_sample = 0.0 self.X_test_sample = 487.58 self.y_test_sample = 0.9448599618077758 self.parameters = { 'objective': 'reg:linear', 'learning_rate': .07, 'max_depth': 10, 'min_child_weight': 4, 'silent': 1, 'subsample': 0.7, 'colsample_bytree': 0.7, 'n_estimators': 20, } self.train_mae = 0.8953377462440475 self.train_rmse = 1.4144230033451395 self.prediction = 1.2296733856201172
def setUp(self): np.random.seed(31337) self.decimal_tolerance = 10 self.test_cols = ['high', 'low', 'close'] self.moving_average_lag = 4 self.test_data_path = join(get_project_path(), 'crypr', 'tests', 'data', 'test_raw_btc.csv') self.data = pd.read_csv(self.test_data_path, index_col=0)
def setUp(self): self.project_path = get_project_path() self.data_dir = join(self.project_path, 'crypr', 'tests', 'data') self.data = pd.read_csv(join(self.data_dir, 'test_raw_btc.csv'), index_col=0) self.Tx = 72 self.Ty = 1 self.target_col = 'close' self.wavelet = 'haar' self.moving_averages = [6, 12, 24, 48, 72] self.dummy_arr_2d = np.reshape(np.arange(5 * 4 * 3), (5, 12))
def setUp(self): self.data_dir = join(get_project_path(), 'crypr', 'tests', 'data') self.test_series = pd.Series(list(range(10))) self.tx = 4 self.ty_single = 1 self.ty_multiple = 2 self.to_supervised_ty_single = series_to_supervised( self.test_series, n_in=self.tx, n_out=self.ty_single, dropnan=True) self.to_supervised_ty_multiple = series_to_supervised( self.test_series, n_in=self.tx, n_out=self.ty_multiple, dropnan=True)
def main(): print('Making features from raw data...') data_dir = join(get_project_path(), 'data', 'raw') output_dir = join(get_project_path(), 'data', 'processed') makedirs(output_dir, exist_ok=True) coins = ['BTC', 'ETH'] TARGET = 'close' Tx = 72 Ty = 1 TEST_SIZE = 0.05 for SYM in coins: raw_data_path = join(data_dir, SYM + '.csv') print('Featurizing raw {} data from {}...'.format(SYM, raw_data_path)) raw_df = pd.read_csv(raw_data_path, index_col=0) feature_df = make_features( raw_df, target_col=TARGET, keep_cols=['close', 'high', 'low', 'volumeto', 'volumefrom'], ma_lags=[6, 12, 24, 48], ma_cols=['close', 'volumefrom', 'volumeto']) X, y = data_to_supervised(feature_df, target_ix=-1, Tx=Tx, Ty=Ty) num_features = int(X.shape[1] / Tx) X = make_3d(X, tx=Tx, num_channels=num_features) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=TEST_SIZE, shuffle=False) np.save(arr=X_train, file=join(output_dir, 'X_train_{}'.format(SYM))) np.save(arr=X_test, file=join(output_dir, 'X_test_{}'.format(SYM))) np.save(arr=y_train, file=join(output_dir, 'y_train_{}'.format(SYM))) np.save(arr=y_test, file=join(output_dir, 'y_test_{}'.format(SYM)))
def main(hours): print('Downloading data from Cryptocompare ...') output_dir = join(get_project_path(), 'data', 'raw') makedirs(output_dir, exist_ok=True) coins = ['BTC', 'ETH'] for coin in coins: print('Retrieving {} coin data from API...'.format(coin)) raw_df = retrieve_all_data(coin=coin, num_hours=hours, comparison_symbol='USD') output_path = join(output_dir, coin + '.csv') raw_df.to_csv(output_path)
from keras.optimizers import Adam from keras.callbacks import TensorBoard from crypr.zoo import LSTM_triggerNG, LSTM_WSAEs, build_ae_lstm from crypr.util import get_project_path # In[2]: """ Import Data. """ SYM = 'BTC' Ty = 1 Tx = 72 num_channels = 1 wt_type = 'haar' data_dir = os.path.join(get_project_path(), 'data', 'processed') X_train = np.load( os.path.join(data_dir, 'X_train_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx))) X_test = np.load( os.path.join(data_dir, 'X_test_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx))) y_train = np.load( os.path.join(data_dir, 'y_train_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx))) y_test = np.load( os.path.join(data_dir, 'y_test_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx))) # In[3]:
from scipy import signal import pywt from crypr.util import get_project_path from crypr.build import make_features, data_to_supervised, make_3d from crypr.transformers import PassthroughTransformer, HaarSmoothTransformer # In[2]: SYM = 'BTC' TARGET = 'close' Tx = 72 Ty = 1 TEST_SIZE = 0.05 data_path = join(get_project_path(), 'data', 'raw', SYM + '.csv') data = pd.read_csv(data_path, index_col=0) """ Train Test Split. """ data_train, data_test = train_test_split(data, test_size=TEST_SIZE, shuffle=False) data_train = data_train.dropna() data_test = data_test.dropna().iloc[:-1] p(data_train.shape, data_test.shape) data_test.head() # In[3]: """
from crypr.util import get_project_path import statsmodels import statsmodels.formula.api as smf import statsmodels.tsa.api as smt import statsmodels.api as sm import scipy.stats as scs import arch # In[98]: """ Import Data. """ SYM = 'BTC' data_path = os.path.join(get_project_path(), 'data', 'raw', SYM + '.csv') data = pd.read_csv(os.path.join(data_path), index_col=-1) data.head() # In[99]: # # log returns # lrets = np.log(df.close/df.close.shift(1)).dropna() # lrets.plot() # plt.show() # percent change pchange = data['close'].pct_change() pchange.plot() plt.show()
get_ipython().run_line_magic('matplotlib', 'inline') from xgboost import XGBRegressor from xgboost import plot_importance from sklearn.model_selection import GridSearchCV from sklearn.metrics import mean_absolute_error, mean_squared_error from crypr.util import get_project_path # In[2]: SYM = 'BTC' Ty = 1 Tx = 72 MAX_LAG = 72 data_dir = join(get_project_path(), 'data', 'processed') # In[3]: """ Import Data. """ def load_preprocessed_data(from_dir, sym): X_train = np.load(join(data_dir, 'X_train_{}.npy'.format(SYM))) y_train = np.load(join(data_dir, 'y_train_{}.npy'.format(SYM))) X_test = np.load(join(data_dir, 'X_test_{}.npy'.format(SYM))) y_test = np.load(join(data_dir, 'y_test_{}.npy'.format(SYM))) return X_train, X_test, y_train, y_test
import seaborn as sns get_ipython().run_line_magic('matplotlib', 'inline') from sklearn.pipeline import Pipeline from sklearn.preprocessing import FunctionTransformer from sklearn.preprocessing import StandardScaler from keras.layers import Input, LSTM, BatchNormalization, Dense from keras import Model from keras.initializers import RandomNormal, Ones, Constant from keras.optimizers import Adam from crypr.util import get_project_path coin = 'BTC' data_path = join(get_project_path(), 'data', 'processed') # In[2]: """ Import Data. """ Ty = 1 Tx = 72 feature_lag = 72 X_train = np.load(join(data_path, 'X_train_{}.npy'.format(coin))) Y_train = np.load(join(data_path, 'Y_train_{}.npy'.format(coin))) X_test = np.load(join(data_path, 'X_test_{}.npy'.format(coin))) Y_test = np.load(join(data_path, 'Y_test_{}.npy'.format(coin))) N_FEATURES = X_train.shape[2]
"""Run the API by calling this module""" import connexion from flask import abort from os.path import join import pandas as pd from crypr.models import SavedKerasTensorflowModel from crypr.build import make_features, series_to_predict_matrix, make_3d from crypr.cryptocompare import retrieve_all_data from crypr.util import get_project_path models_path = join(get_project_path(), 'models') model_type = 'lstm_ng' global btc_model, eth_model btc_model_filename = '{}_{}.h5'.format(model_type, 'BTC') btc_model = SavedKerasTensorflowModel(join(models_path, btc_model_filename)) eth_model_filename = '{}_{}.h5'.format(model_type, 'ETH') eth_model = SavedKerasTensorflowModel(join(models_path, eth_model_filename)) def description(): return {'message': 'The crypto-predict API'} def say_hello(name=None): return {'message': 'Hello, {}!'.format(name or '')} def predict(coin=None): coin = coin or 'BTC'
from keras.optimizers import Adam from keras.callbacks import TensorBoard from crypr.zoo import LSTM_triggerNG from crypr.util import get_project_path # In[2]: """ Import Data. """ SYM = 'BTC' Ty = 1 Tx = 72 num_channels = 8 data_dir = join(get_project_path(), 'data', 'processed') X_train = np.load(join(data_dir, 'X_train_multiple_smooth_{}.npy'.format(SYM))) X_test = np.load(join(data_dir, 'X_test_multiple_smooth_{}.npy'.format(SYM))) y_train = np.load(join(data_dir, 'y_train_multiple_smooth_{}.npy'.format(SYM))) y_test = np.load(join(data_dir, 'y_test_multiple_smooth_{}.npy'.format(SYM))) # In[3]: X_train.shape, X_test.shape, y_test.shape, y_train.shape # In[4]: """ Evaluate Dummy Models. """ for strategy in ['mean', 'median', 'constant']:
def main(epochs, verbose): print('Creating and training models for API...') input_dir = join(get_project_path(), 'data', 'processed') output_dir = join(get_project_path(), 'models') makedirs(output_dir, exist_ok=True) # Data params coins = ['BTC', 'ETH'] ty = 1 tx = 72 num_channels = 1 wavelet = 'haar' # Model params batch_size = 32 learning_rate = .001 loss = 'mae' beta_1 = 0.9 beta_2 = 0.999 decay = 0.01 model_type = 'ae_lstm' for coin in coins: print('Loading preprocessed {} data from {}'.format(coin, input_dir)) X_train = np.load(join(input_dir, 'X_train_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx))) X_test = np.load(join(input_dir, 'X_test_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx))) y_train = np.load(join(input_dir, 'y_train_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx))) y_test = np.load(join(input_dir, 'y_test_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx))) print('Building model {}...'.format(model_type)) if model_type == 'ae_lstm': estimator = build_ae_lstm(num_inputs=X_train.shape[-1], num_channels=num_channels, num_outputs=ty) model = RegressionModel(estimator) else: raise ValueError('Model type {} is not supported. Exiting.'.format(model_type)) print(model.estimator.summary()) tb_log_dir = join(output_dir, 'logs') tensorboard = TensorBoard(log_dir=tb_log_dir, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) opt = Adam(lr=learning_rate, beta_1=beta_1, beta_2=beta_2, decay=decay) model.estimator.compile(loss=loss, optimizer=opt) print('Training model for {} epochs ...'.format(epochs)) print('Track model fit with `tensorboard --logdir {}`'.format(tb_log_dir)) model.fit( X_train, [X_train, y_train], shuffle=False, epochs=epochs, batch_size=batch_size, validation_data=(X_test, [X_test, y_test]), callbacks=[tensorboard], verbose=verbose ) model_filename = '{}_smooth_{}x{}_{}_{}.h5'.format(model_type, num_channels, tx, wavelet, coin) output_path = join(output_dir, model_filename) print('Saving trained model to {}...'.format(output_path)) model.estimator.save(output_path)
from os.path import join from datetime import datetime import numpy as np import pandas as pd import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') from sklearn.compose import ColumnTransformer from sklearn.model_selection import train_test_split from crypr.transformers import MovingAverageTransformer, PercentChangeTransformer, PassthroughTransformer from crypr.build import data_to_supervised from crypr.util import get_project_path coin = 'BTC' data_path = join(get_project_path(), 'data', 'raw', coin + '.csv') # In[2]: data = pd.read_csv(data_path, index_col=0) p(data.shape) data.head() # In[3]: preprocessing_config = { 'passthrough': ['close', 'low', 'high'], 'moving_average': ['close', 'volumeto', 'volumefrom'], 'target': 'close', 'tx': 72, 'ty': 1,
from crypr.util import get_project_path, utc_timestamp_ymd if __name__ == '__main__': np.random.seed(31337) SYM = 'ETH' LAST_N_HOURS = 14000 FEATURE_WINDOW = 72 MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72] TO_TIME = utc_timestamp_ymd(2018, 6, 27) TARGET = 'close' Tx = 72 Ty = 1 TEST_SIZE = 0.05 project_path = get_project_path() test_data_dir = join(project_path, 'crypr', 'tests', 'data') data = retrieve_all_data(coin=SYM, num_hours=LAST_N_HOURS, comparison_symbol='USD', end_time=TO_TIME) preprocessor = SimplePreprocessor(production=False, target_col=TARGET, Tx=Tx, Ty=Ty, moving_averages=MOVING_AVERAGE_LAGS) X, y = preprocessor.fit(data).transform(data) old_shape = X.shape