Exemple #1
0
def main():
    print('Making features from raw data...')

    data_dir = join(get_project_path(), 'data', 'raw')
    output_dir = join(get_project_path(), 'data', 'processed')
    makedirs(output_dir, exist_ok=True)

    coins = ['BTC', 'ETH']
    TARGET = 'close'
    Tx = 72
    Ty = 1
    TEST_SIZE = 0.05
    WAVELET = 'haar'

    for SYM in coins:
        raw_data_path = join(data_dir, SYM + '.csv')
        print('Featurizing raw {} data from {}...'.format(SYM, raw_data_path))

        raw_df = pd.read_csv(raw_data_path, index_col=0)

        preprocessor = DWTSmoothPreprocessor(production=False, target_col=TARGET, Tx=Tx, Ty=Ty, wavelet=WAVELET)
        X_smoothed, y = preprocessor.fit_transform(raw_df)

        X_train, X_test, y_train, y_test = train_test_split(X_smoothed, y, test_size=TEST_SIZE, shuffle=False)

        np.save(arr=X_train, file=join(output_dir, 'X_train_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx)))
        np.save(arr=X_test, file=join(output_dir, 'X_test_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx)))
        np.save(arr=y_train, file=join(output_dir, 'y_train_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx)))
        np.save(arr=y_test, file=join(output_dir, 'y_test_{}_{}_smooth_{}'.format(SYM, WAVELET, Tx)))
    def setUp(self):
        np.random.seed(31337)

        self.project_path = get_project_path()
        self.data_dir = join(self.project_path, 'crypr', 'tests', 'data')

        self.SYM = 'ETH'
        self.LAST_N_HOURS = 14000
        self.FEATURE_WINDOW = 72
        self.MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72]
        self.TARGET = 'close'
        self.Tx = 72
        self.Ty = 1
        self.TEST_SIZE = 0.05
        self.end_time = utc_timestamp_ymd(2018, 6, 27)

        self.data = retrieve_all_data(coin=self.SYM,
                                      num_hours=self.LAST_N_HOURS,
                                      comparison_symbol='USD',
                                      end_time=self.end_time)

        self.predict_data = retrieve_all_data(coin=self.SYM,
                                              num_hours=self.Tx +
                                              self.FEATURE_WINDOW - 1,
                                              comparison_symbol='USD',
                                              end_time=self.end_time)

        self.X_shape = (13852, 1224)
        self.y_shape = (13852, 1)

        self.X_sample = 709.48
        self.y_sample = -1.498064809896027

        self.X_train_shape = (13159, 1224)
        self.X_test_shape = (693, 1224)
        self.y_train_shape = (13159, 1)
        self.y_test_shape = (693, 1)

        self.X_train_sample = 11.41
        self.y_train_sample = 0.0

        self.X_test_sample = 487.58
        self.y_test_sample = 0.9448599618077758

        self.parameters = {
            'objective': 'reg:linear',
            'learning_rate': .07,
            'max_depth': 10,
            'min_child_weight': 4,
            'silent': 1,
            'subsample': 0.7,
            'colsample_bytree': 0.7,
            'n_estimators': 20,
        }

        self.train_mae = 0.8953377462440475
        self.train_rmse = 1.4144230033451395
        self.prediction = 1.2296733856201172
    def setUp(self):
        np.random.seed(31337)
        self.decimal_tolerance = 10

        self.test_cols = ['high', 'low', 'close']
        self.moving_average_lag = 4

        self.test_data_path = join(get_project_path(), 'crypr', 'tests', 'data', 'test_raw_btc.csv')
        self.data = pd.read_csv(self.test_data_path, index_col=0)
 def setUp(self):
     self.project_path = get_project_path()
     self.data_dir = join(self.project_path, 'crypr', 'tests', 'data')
     self.data = pd.read_csv(join(self.data_dir, 'test_raw_btc.csv'),
                             index_col=0)
     self.Tx = 72
     self.Ty = 1
     self.target_col = 'close'
     self.wavelet = 'haar'
     self.moving_averages = [6, 12, 24, 48, 72]
     self.dummy_arr_2d = np.reshape(np.arange(5 * 4 * 3), (5, 12))
Exemple #5
0
 def setUp(self):
     self.data_dir = join(get_project_path(), 'crypr', 'tests', 'data')
     self.test_series = pd.Series(list(range(10)))
     self.tx = 4
     self.ty_single = 1
     self.ty_multiple = 2
     self.to_supervised_ty_single = series_to_supervised(
         self.test_series, n_in=self.tx, n_out=self.ty_single, dropnan=True)
     self.to_supervised_ty_multiple = series_to_supervised(
         self.test_series,
         n_in=self.tx,
         n_out=self.ty_multiple,
         dropnan=True)
def main():
    print('Making features from raw data...')

    data_dir = join(get_project_path(), 'data', 'raw')
    output_dir = join(get_project_path(), 'data', 'processed')
    makedirs(output_dir, exist_ok=True)

    coins = ['BTC', 'ETH']
    TARGET = 'close'
    Tx = 72
    Ty = 1
    TEST_SIZE = 0.05

    for SYM in coins:
        raw_data_path = join(data_dir, SYM + '.csv')
        print('Featurizing raw {} data from {}...'.format(SYM, raw_data_path))

        raw_df = pd.read_csv(raw_data_path, index_col=0)

        feature_df = make_features(
            raw_df,
            target_col=TARGET,
            keep_cols=['close', 'high', 'low', 'volumeto', 'volumefrom'],
            ma_lags=[6, 12, 24, 48],
            ma_cols=['close', 'volumefrom', 'volumeto'])

        X, y = data_to_supervised(feature_df, target_ix=-1, Tx=Tx, Ty=Ty)

        num_features = int(X.shape[1] / Tx)
        X = make_3d(X, tx=Tx, num_channels=num_features)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=TEST_SIZE, shuffle=False)

        np.save(arr=X_train, file=join(output_dir, 'X_train_{}'.format(SYM)))
        np.save(arr=X_test, file=join(output_dir, 'X_test_{}'.format(SYM)))
        np.save(arr=y_train, file=join(output_dir, 'y_train_{}'.format(SYM)))
        np.save(arr=y_test, file=join(output_dir, 'y_test_{}'.format(SYM)))
def main(hours):
    print('Downloading data from Cryptocompare ...')

    output_dir = join(get_project_path(), 'data', 'raw')
    makedirs(output_dir, exist_ok=True)

    coins = ['BTC', 'ETH']

    for coin in coins:
        print('Retrieving {} coin data from API...'.format(coin))
        raw_df = retrieve_all_data(coin=coin,
                                   num_hours=hours,
                                   comparison_symbol='USD')
        output_path = join(output_dir, coin + '.csv')
        raw_df.to_csv(output_path)
Exemple #8
0
from keras.optimizers import Adam
from keras.callbacks import TensorBoard

from crypr.zoo import LSTM_triggerNG, LSTM_WSAEs, build_ae_lstm
from crypr.util import get_project_path

# In[2]:
"""
Import Data.
"""
SYM = 'BTC'
Ty = 1
Tx = 72
num_channels = 1
wt_type = 'haar'
data_dir = os.path.join(get_project_path(), 'data', 'processed')

X_train = np.load(
    os.path.join(data_dir,
                 'X_train_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx)))
X_test = np.load(
    os.path.join(data_dir,
                 'X_test_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx)))
y_train = np.load(
    os.path.join(data_dir,
                 'y_train_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx)))
y_test = np.load(
    os.path.join(data_dir,
                 'y_test_{}_{}_smooth_{}.npy'.format(SYM, wt_type, Tx)))

# In[3]:
Exemple #9
0
from scipy import signal
import pywt

from crypr.util import get_project_path
from crypr.build import make_features, data_to_supervised, make_3d
from crypr.transformers import PassthroughTransformer, HaarSmoothTransformer

# In[2]:

SYM = 'BTC'
TARGET = 'close'
Tx = 72
Ty = 1
TEST_SIZE = 0.05

data_path = join(get_project_path(), 'data', 'raw', SYM + '.csv')
data = pd.read_csv(data_path, index_col=0)
"""
Train Test Split.
"""
data_train, data_test = train_test_split(data,
                                         test_size=TEST_SIZE,
                                         shuffle=False)
data_train = data_train.dropna()
data_test = data_test.dropna().iloc[:-1]

p(data_train.shape, data_test.shape)
data_test.head()

# In[3]:
"""
from crypr.util import get_project_path

import statsmodels
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs

import arch

# In[98]:
"""
Import Data.
"""
SYM = 'BTC'
data_path = os.path.join(get_project_path(), 'data', 'raw', SYM + '.csv')
data = pd.read_csv(os.path.join(data_path), index_col=-1)
data.head()

# In[99]:

# # log returns
# lrets = np.log(df.close/df.close.shift(1)).dropna()
# lrets.plot()
# plt.show()

# percent change
pchange = data['close'].pct_change()
pchange.plot()
plt.show()
Exemple #11
0
get_ipython().run_line_magic('matplotlib', 'inline')

from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error

from crypr.util import get_project_path

# In[2]:

SYM = 'BTC'
Ty = 1
Tx = 72
MAX_LAG = 72
data_dir = join(get_project_path(), 'data', 'processed')

# In[3]:
"""
Import Data.
"""


def load_preprocessed_data(from_dir, sym):
    X_train = np.load(join(data_dir, 'X_train_{}.npy'.format(SYM)))
    y_train = np.load(join(data_dir, 'y_train_{}.npy'.format(SYM)))
    X_test = np.load(join(data_dir, 'X_test_{}.npy'.format(SYM)))
    y_test = np.load(join(data_dir, 'y_test_{}.npy'.format(SYM)))
    return X_train, X_test, y_train, y_test

Exemple #12
0
import seaborn as sns

get_ipython().run_line_magic('matplotlib', 'inline')
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler

from keras.layers import Input, LSTM, BatchNormalization, Dense
from keras import Model
from keras.initializers import RandomNormal, Ones, Constant
from keras.optimizers import Adam

from crypr.util import get_project_path

coin = 'BTC'
data_path = join(get_project_path(), 'data', 'processed')

# In[2]:
"""
Import Data.
"""
Ty = 1
Tx = 72
feature_lag = 72

X_train = np.load(join(data_path, 'X_train_{}.npy'.format(coin)))
Y_train = np.load(join(data_path, 'Y_train_{}.npy'.format(coin)))
X_test = np.load(join(data_path, 'X_test_{}.npy'.format(coin)))
Y_test = np.load(join(data_path, 'Y_test_{}.npy'.format(coin)))

N_FEATURES = X_train.shape[2]
Exemple #13
0
"""Run the API by calling this module"""
import connexion
from flask import abort
from os.path import join
import pandas as pd
from crypr.models import SavedKerasTensorflowModel
from crypr.build import make_features, series_to_predict_matrix, make_3d
from crypr.cryptocompare import retrieve_all_data
from crypr.util import get_project_path

models_path = join(get_project_path(), 'models')

model_type = 'lstm_ng'

global btc_model, eth_model
btc_model_filename = '{}_{}.h5'.format(model_type, 'BTC')
btc_model = SavedKerasTensorflowModel(join(models_path, btc_model_filename))
eth_model_filename = '{}_{}.h5'.format(model_type, 'ETH')
eth_model = SavedKerasTensorflowModel(join(models_path, eth_model_filename))


def description():
    return {'message': 'The crypto-predict API'}


def say_hello(name=None):
    return {'message': 'Hello, {}!'.format(name or '')}


def predict(coin=None):
    coin = coin or 'BTC'
from keras.optimizers import Adam
from keras.callbacks import TensorBoard

from crypr.zoo import LSTM_triggerNG
from crypr.util import get_project_path

# In[2]:
"""
Import Data.
"""
SYM = 'BTC'
Ty = 1
Tx = 72
num_channels = 8
data_dir = join(get_project_path(), 'data', 'processed')

X_train = np.load(join(data_dir, 'X_train_multiple_smooth_{}.npy'.format(SYM)))
X_test = np.load(join(data_dir, 'X_test_multiple_smooth_{}.npy'.format(SYM)))
y_train = np.load(join(data_dir, 'y_train_multiple_smooth_{}.npy'.format(SYM)))
y_test = np.load(join(data_dir, 'y_test_multiple_smooth_{}.npy'.format(SYM)))

# In[3]:

X_train.shape, X_test.shape, y_test.shape, y_train.shape

# In[4]:
"""
Evaluate Dummy Models.
"""
for strategy in ['mean', 'median', 'constant']:
def main(epochs, verbose):
    print('Creating and training models for API...')

    input_dir = join(get_project_path(), 'data', 'processed')
    output_dir = join(get_project_path(), 'models')
    makedirs(output_dir, exist_ok=True)

    # Data params
    coins = ['BTC', 'ETH']
    ty = 1
    tx = 72
    num_channels = 1
    wavelet = 'haar'

    # Model params
    batch_size = 32
    learning_rate = .001
    loss = 'mae'
    beta_1 = 0.9
    beta_2 = 0.999
    decay = 0.01
    model_type = 'ae_lstm'

    for coin in coins:
        print('Loading preprocessed {} data from {}'.format(coin, input_dir))

        X_train = np.load(join(input_dir, 'X_train_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx)))
        X_test = np.load(join(input_dir, 'X_test_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx)))
        y_train = np.load(join(input_dir, 'y_train_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx)))
        y_test = np.load(join(input_dir, 'y_test_{}_{}_smooth_{}.npy'.format(coin, wavelet, tx)))

        print('Building model {}...'.format(model_type))
        if model_type == 'ae_lstm':
            estimator = build_ae_lstm(num_inputs=X_train.shape[-1], num_channels=num_channels, num_outputs=ty)
            model = RegressionModel(estimator)
        else:
            raise ValueError('Model type {} is not supported. Exiting.'.format(model_type))
        print(model.estimator.summary())

        tb_log_dir = join(output_dir, 'logs')
        tensorboard = TensorBoard(log_dir=tb_log_dir, histogram_freq=0, batch_size=batch_size,
                                  write_graph=True, write_grads=False, write_images=False)

        opt = Adam(lr=learning_rate, beta_1=beta_1, beta_2=beta_2, decay=decay)
        model.estimator.compile(loss=loss, optimizer=opt)

        print('Training model for {} epochs ...'.format(epochs))
        print('Track model fit with `tensorboard --logdir {}`'.format(tb_log_dir))

        model.fit(
            X_train, [X_train, y_train],
            shuffle=False,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_test, [X_test, y_test]),
            callbacks=[tensorboard],
            verbose=verbose
        )

        model_filename = '{}_smooth_{}x{}_{}_{}.h5'.format(model_type, num_channels, tx, wavelet, coin)
        output_path = join(output_dir, model_filename)
        print('Saving trained model to {}...'.format(output_path))
        model.estimator.save(output_path)
Exemple #16
0
from os.path import join
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

from crypr.transformers import MovingAverageTransformer, PercentChangeTransformer, PassthroughTransformer
from crypr.build import data_to_supervised
from crypr.util import get_project_path

coin = 'BTC'
data_path = join(get_project_path(), 'data', 'raw', coin + '.csv')

# In[2]:

data = pd.read_csv(data_path, index_col=0)
p(data.shape)
data.head()

# In[3]:

preprocessing_config = {
    'passthrough': ['close', 'low', 'high'],
    'moving_average': ['close', 'volumeto', 'volumefrom'],
    'target': 'close',
    'tx': 72,
    'ty': 1,
from crypr.util import get_project_path, utc_timestamp_ymd

if __name__ == '__main__':
    np.random.seed(31337)

    SYM = 'ETH'
    LAST_N_HOURS = 14000
    FEATURE_WINDOW = 72
    MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72]
    TO_TIME = utc_timestamp_ymd(2018, 6, 27)
    TARGET = 'close'
    Tx = 72
    Ty = 1
    TEST_SIZE = 0.05

    project_path = get_project_path()
    test_data_dir = join(project_path, 'crypr', 'tests', 'data')

    data = retrieve_all_data(coin=SYM,
                             num_hours=LAST_N_HOURS,
                             comparison_symbol='USD',
                             end_time=TO_TIME)

    preprocessor = SimplePreprocessor(production=False,
                                      target_col=TARGET,
                                      Tx=Tx,
                                      Ty=Ty,
                                      moving_averages=MOVING_AVERAGE_LAGS)
    X, y = preprocessor.fit(data).transform(data)

    old_shape = X.shape