Ejemplo n.º 1
0
    def setUp(self):
        np.random.seed(31337)

        self.project_path = get_project_path()
        self.data_dir = join(self.project_path, 'crypr', 'tests', 'data')

        self.SYM = 'ETH'
        self.LAST_N_HOURS = 14000
        self.FEATURE_WINDOW = 72
        self.MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72]
        self.TARGET = 'close'
        self.Tx = 72
        self.Ty = 1
        self.TEST_SIZE = 0.05
        self.end_time = utc_timestamp_ymd(2018, 6, 27)

        self.data = retrieve_all_data(coin=self.SYM,
                                      num_hours=self.LAST_N_HOURS,
                                      comparison_symbol='USD',
                                      end_time=self.end_time)

        self.predict_data = retrieve_all_data(coin=self.SYM,
                                              num_hours=self.Tx +
                                              self.FEATURE_WINDOW - 1,
                                              comparison_symbol='USD',
                                              end_time=self.end_time)

        self.X_shape = (13852, 1224)
        self.y_shape = (13852, 1)

        self.X_sample = 709.48
        self.y_sample = -1.498064809896027

        self.X_train_shape = (13159, 1224)
        self.X_test_shape = (693, 1224)
        self.y_train_shape = (13159, 1)
        self.y_test_shape = (693, 1)

        self.X_train_sample = 11.41
        self.y_train_sample = 0.0

        self.X_test_sample = 487.58
        self.y_test_sample = 0.9448599618077758

        self.parameters = {
            'objective': 'reg:linear',
            'learning_rate': .07,
            'max_depth': 10,
            'min_child_weight': 4,
            'silent': 1,
            'subsample': 0.7,
            'colsample_bytree': 0.7,
            'n_estimators': 20,
        }

        self.train_mae = 0.8953377462440475
        self.train_rmse = 1.4144230033451395
        self.prediction = 1.2296733856201172
Ejemplo n.º 2
0
 def runTest(self):
     for hours in self.num_hours:
         data = retrieve_all_data(coin=self.coin,
                                  num_hours=hours,
                                  comparison_symbol=self.comparison_sym,
                                  exchange=self.exchange,
                                  end_time=self.end_to_time)
         self.shapeCheck(data=data, num_hours=hours)
         self.columnsCheck(data=data)
         self.ascendingUniqueCheck(data=data)
         self.equalSpacingCheck(data=data)
Ejemplo n.º 3
0
def predict(coin=None):
    coin = coin or 'BTC'
    wavelet = 'haar'
    Tx = 72
    Ty = 1
    target = 'close'

    cryptocompare_data = retrieve_all_data(coin, Tx + 1)

    preprocessor = DWTSmoothPreprocessor(
        production=True,
        target_col=target,
        Tx=Tx,
        Ty=Ty,
        wavelet=wavelet,
        name='CryptoPredict_DWTSmoothPreprocessor_{}'.format(coin))
    preprocessed_data = preprocessor.fit(cryptocompare_data).transform(
        cryptocompare_data)

    if coin == 'ETH':
        model = eth_model
    elif coin == 'BTC':
        model = btc_model
    else:
        #FIXME: More descriptive error
        abort(404)
    with model.graph.as_default():
        prediction = model.estimator.predict(preprocessed_data)

    def parse_keras_prediction(keras_prediction):
        """Handles multi-output Keras models"""
        if len(keras_prediction) == 2:
            return keras_prediction[1][0]
        else:
            return keras_prediction[0]

    parsed_prediction = parse_keras_prediction(prediction)

    last_value = cryptocompare_data[target].iloc[-1]
    last_time = cryptocompare_data['timestamp'].iloc[-1]

    prediction_val = [
        last_value + pred / 100 * last_value for pred in parsed_prediction
    ]
    time_val = [
        last_time + pd.Timedelta(hours=1 * (int(ix) + 1))
        for ix in range(len(parsed_prediction))
    ]
    return dict(prediction=prediction_val, time=time_val)
Ejemplo n.º 4
0
def main(hours):
    print('Downloading data from Cryptocompare ...')

    output_dir = join(get_project_path(), 'data', 'raw')
    makedirs(output_dir, exist_ok=True)

    coins = ['BTC', 'ETH']

    for coin in coins:
        print('Retrieving {} coin data from API...'.format(coin))
        raw_df = retrieve_all_data(coin=coin,
                                   num_hours=hours,
                                   comparison_symbol='USD')
        output_path = join(output_dir, coin + '.csv')
        raw_df.to_csv(output_path)
Ejemplo n.º 5
0
def main(hours):
    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    logging.basicConfig(level=logging.INFO, format=log_fmt)

    load_dotenv(find_dotenv())
    project_path = os.path.dirname(find_dotenv())

    logger = logging.getLogger(__name__)
    logger.info('Downloading data from Cryptocompare ...')

    output_path = '{}/data/raw'.format(project_path)
    if not os.path.exists(output_path):
        print('Making output directory...')
        os.makedirs(output_path)

    coins = ['BTC', 'ETH']

    for coin in coins:
        coin_data = cryptocompare.retrieve_all_data(coin=coin,
                                                    num_hours=hours,
                                                    comparison_symbol='USD')
        coin_output_path = '{}/{}.csv'.format(output_path, coin)
        coin_data.to_csv(coin_output_path)
Ejemplo n.º 6
0
def predict(coin=None):
    coin = coin or 'BTC'
    Tx = 72
    target = 'close'

    cryptocompare_data = retrieve_all_data(coin, Tx + 1 + 48)
    preprocessed_data = make_features(cryptocompare_data, target_col=target,
                                      keep_cols=['close', 'high', 'low', 'volumeto', 'volumefrom'],
                                      ma_lags=[6, 12, 24, 48], ma_cols=['close', 'volumefrom', 'volumeto'])
    time_series_data = series_to_predict_matrix(preprocessed_data, Tx)
    n_features = int(time_series_data.shape[1]/Tx)
    model_input_data = make_3d(arr=time_series_data, tx=Tx, num_channels=n_features)

    if coin == 'ETH':
        model = eth_model
    elif coin == 'BTC':
        model = btc_model
    else:
        abort(404)  # FIXME: More descriptive error

    with model.graph.as_default():
        prediction = model.estimator.predict(model_input_data)

    def parse_keras_prediction(keras_prediction):
        """Handles multi-output Keras models"""
        if len(keras_prediction) == 2:
            return keras_prediction[1][0]
        else:
            return keras_prediction[0]
    parsed_prediction = parse_keras_prediction(prediction)

    last_value = cryptocompare_data[target].iloc[-1]
    last_time = cryptocompare_data['timestamp'].iloc[-1]

    prediction_val = [last_value + pred/100*last_value for pred in parsed_prediction]
    time_val = [last_time + pd.Timedelta(hours=ix + 1) for ix in range(len(parsed_prediction))]
    return dict(prediction=prediction_val, time=time_val)
Ejemplo n.º 7
0
    SYM = 'ETH'
    LAST_N_HOURS = 14000
    FEATURE_WINDOW = 72
    MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72]
    TO_TIME = utc_timestamp_ymd(2018, 6, 27)
    TARGET = 'close'
    Tx = 72
    Ty = 1
    TEST_SIZE = 0.05

    project_path = get_project_path()
    test_data_dir = join(project_path, 'crypr', 'tests', 'data')

    data = retrieve_all_data(coin=SYM,
                             num_hours=LAST_N_HOURS,
                             comparison_symbol='USD',
                             end_time=TO_TIME)

    preprocessor = SimplePreprocessor(production=False,
                                      target_col=TARGET,
                                      Tx=Tx,
                                      Ty=Ty,
                                      moving_averages=MOVING_AVERAGE_LAGS)
    X, y = preprocessor.fit(data).transform(data)

    old_shape = X.shape
    new_shape = (old_shape[0], old_shape[1] * old_shape[2])
    X = pd.DataFrame(np.reshape(a=X, newshape=new_shape),
                     columns=preprocessor.engineered_columns)

    print('X shape: {}'.format(X.shape))
Ejemplo n.º 8
0
    SYM = 'ETH'
    LAST_N_HOURS = 14000
    FEATURE_WINDOW=72
    MOVING_AVERAGE_LAGS = [6, 12, 24, 48, 72]
    TARGET = 'close'
    Tx = 72
    Ty = 1
    TEST_SIZE = 0.05

    load_dotenv(find_dotenv())
    project_path = os.path.dirname(find_dotenv())

    data = retrieve_all_data(
        coin=SYM,
        num_hours=LAST_N_HOURS,
        comparison_symbol='USD',
        end_time=(np.datetime64(datetime.datetime(2018, 6, 27)).astype('uint64') / 1e6).astype('uint32'))

    preprocessor = SimplePreprocessor(
        production=False,
        target_col=TARGET,
        Tx=Tx,
        Ty=Ty,
        moving_averages=MOVING_AVERAGE_LAGS,
        name='unit_test')
    X, y = preprocessor.fit(data).transform(data)

    old_shape = X.shape
    new_shape = (old_shape[0], old_shape[1] * old_shape[2])
    X = pd.DataFrame(np.reshape(a=X, newshape=new_shape), columns=preprocessor.engineered_columns)