def setUp(self): np.random.seed(31337) self.project_path = get_project_path() self.data_dir = join(self.project_path, 'crypr', 'tests', 'data') self.SYM = 'ETH' self.LAST_N_HOURS = 14000 self.FEATURE_WINDOW = 72 self.MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72] self.TARGET = 'close' self.Tx = 72 self.Ty = 1 self.TEST_SIZE = 0.05 self.end_time = utc_timestamp_ymd(2018, 6, 27) self.data = retrieve_all_data(coin=self.SYM, num_hours=self.LAST_N_HOURS, comparison_symbol='USD', end_time=self.end_time) self.predict_data = retrieve_all_data(coin=self.SYM, num_hours=self.Tx + self.FEATURE_WINDOW - 1, comparison_symbol='USD', end_time=self.end_time) self.X_shape = (13852, 1224) self.y_shape = (13852, 1) self.X_sample = 709.48 self.y_sample = -1.498064809896027 self.X_train_shape = (13159, 1224) self.X_test_shape = (693, 1224) self.y_train_shape = (13159, 1) self.y_test_shape = (693, 1) self.X_train_sample = 11.41 self.y_train_sample = 0.0 self.X_test_sample = 487.58 self.y_test_sample = 0.9448599618077758 self.parameters = { 'objective': 'reg:linear', 'learning_rate': .07, 'max_depth': 10, 'min_child_weight': 4, 'silent': 1, 'subsample': 0.7, 'colsample_bytree': 0.7, 'n_estimators': 20, } self.train_mae = 0.8953377462440475 self.train_rmse = 1.4144230033451395 self.prediction = 1.2296733856201172
def runTest(self): for hours in self.num_hours: data = retrieve_all_data(coin=self.coin, num_hours=hours, comparison_symbol=self.comparison_sym, exchange=self.exchange, end_time=self.end_to_time) self.shapeCheck(data=data, num_hours=hours) self.columnsCheck(data=data) self.ascendingUniqueCheck(data=data) self.equalSpacingCheck(data=data)
def predict(coin=None): coin = coin or 'BTC' wavelet = 'haar' Tx = 72 Ty = 1 target = 'close' cryptocompare_data = retrieve_all_data(coin, Tx + 1) preprocessor = DWTSmoothPreprocessor( production=True, target_col=target, Tx=Tx, Ty=Ty, wavelet=wavelet, name='CryptoPredict_DWTSmoothPreprocessor_{}'.format(coin)) preprocessed_data = preprocessor.fit(cryptocompare_data).transform( cryptocompare_data) if coin == 'ETH': model = eth_model elif coin == 'BTC': model = btc_model else: #FIXME: More descriptive error abort(404) with model.graph.as_default(): prediction = model.estimator.predict(preprocessed_data) def parse_keras_prediction(keras_prediction): """Handles multi-output Keras models""" if len(keras_prediction) == 2: return keras_prediction[1][0] else: return keras_prediction[0] parsed_prediction = parse_keras_prediction(prediction) last_value = cryptocompare_data[target].iloc[-1] last_time = cryptocompare_data['timestamp'].iloc[-1] prediction_val = [ last_value + pred / 100 * last_value for pred in parsed_prediction ] time_val = [ last_time + pd.Timedelta(hours=1 * (int(ix) + 1)) for ix in range(len(parsed_prediction)) ] return dict(prediction=prediction_val, time=time_val)
def main(hours): print('Downloading data from Cryptocompare ...') output_dir = join(get_project_path(), 'data', 'raw') makedirs(output_dir, exist_ok=True) coins = ['BTC', 'ETH'] for coin in coins: print('Retrieving {} coin data from API...'.format(coin)) raw_df = retrieve_all_data(coin=coin, num_hours=hours, comparison_symbol='USD') output_path = join(output_dir, coin + '.csv') raw_df.to_csv(output_path)
def main(hours): log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(level=logging.INFO, format=log_fmt) load_dotenv(find_dotenv()) project_path = os.path.dirname(find_dotenv()) logger = logging.getLogger(__name__) logger.info('Downloading data from Cryptocompare ...') output_path = '{}/data/raw'.format(project_path) if not os.path.exists(output_path): print('Making output directory...') os.makedirs(output_path) coins = ['BTC', 'ETH'] for coin in coins: coin_data = cryptocompare.retrieve_all_data(coin=coin, num_hours=hours, comparison_symbol='USD') coin_output_path = '{}/{}.csv'.format(output_path, coin) coin_data.to_csv(coin_output_path)
def predict(coin=None): coin = coin or 'BTC' Tx = 72 target = 'close' cryptocompare_data = retrieve_all_data(coin, Tx + 1 + 48) preprocessed_data = make_features(cryptocompare_data, target_col=target, keep_cols=['close', 'high', 'low', 'volumeto', 'volumefrom'], ma_lags=[6, 12, 24, 48], ma_cols=['close', 'volumefrom', 'volumeto']) time_series_data = series_to_predict_matrix(preprocessed_data, Tx) n_features = int(time_series_data.shape[1]/Tx) model_input_data = make_3d(arr=time_series_data, tx=Tx, num_channels=n_features) if coin == 'ETH': model = eth_model elif coin == 'BTC': model = btc_model else: abort(404) # FIXME: More descriptive error with model.graph.as_default(): prediction = model.estimator.predict(model_input_data) def parse_keras_prediction(keras_prediction): """Handles multi-output Keras models""" if len(keras_prediction) == 2: return keras_prediction[1][0] else: return keras_prediction[0] parsed_prediction = parse_keras_prediction(prediction) last_value = cryptocompare_data[target].iloc[-1] last_time = cryptocompare_data['timestamp'].iloc[-1] prediction_val = [last_value + pred/100*last_value for pred in parsed_prediction] time_val = [last_time + pd.Timedelta(hours=ix + 1) for ix in range(len(parsed_prediction))] return dict(prediction=prediction_val, time=time_val)
SYM = 'ETH' LAST_N_HOURS = 14000 FEATURE_WINDOW = 72 MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72] TO_TIME = utc_timestamp_ymd(2018, 6, 27) TARGET = 'close' Tx = 72 Ty = 1 TEST_SIZE = 0.05 project_path = get_project_path() test_data_dir = join(project_path, 'crypr', 'tests', 'data') data = retrieve_all_data(coin=SYM, num_hours=LAST_N_HOURS, comparison_symbol='USD', end_time=TO_TIME) preprocessor = SimplePreprocessor(production=False, target_col=TARGET, Tx=Tx, Ty=Ty, moving_averages=MOVING_AVERAGE_LAGS) X, y = preprocessor.fit(data).transform(data) old_shape = X.shape new_shape = (old_shape[0], old_shape[1] * old_shape[2]) X = pd.DataFrame(np.reshape(a=X, newshape=new_shape), columns=preprocessor.engineered_columns) print('X shape: {}'.format(X.shape))
SYM = 'ETH' LAST_N_HOURS = 14000 FEATURE_WINDOW=72 MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72] TARGET = 'close' Tx = 72 Ty = 1 TEST_SIZE = 0.05 load_dotenv(find_dotenv()) project_path = os.path.dirname(find_dotenv()) data = retrieve_all_data( coin=SYM, num_hours=LAST_N_HOURS, comparison_symbol='USD', end_time=(np.datetime64(datetime.datetime(2018, 6, 27)).astype('uint64') / 1e6).astype('uint32')) preprocessor = SimplePreprocessor( production=False, target_col=TARGET, Tx=Tx, Ty=Ty, moving_averages=MOVING_AVERAGE_LAGS, name='unit_test') X, y = preprocessor.fit(data).transform(data) old_shape = X.shape new_shape = (old_shape[0], old_shape[1] * old_shape[2]) X = pd.DataFrame(np.reshape(a=X, newshape=new_shape), columns=preprocessor.engineered_columns)