def get_exchange_history_window(self,
                                    exchange_name,
                                    assets,
                                    end_dt,
                                    bar_count,
                                    frequency,
                                    field,
                                    data_frequency,
                                    ffill=True):
        """
        Fetching price history window from the exchange bundle.

        Parameters
        ----------
        exchange: Exchange
        assets: list[TradingPair]
        end_dt: datetime
        bar_count: int
        frequency: str
        field: str
        data_frequency: str
        ffill: bool

        Returns
        -------
        DataFrame

        """
        # TODO: verify that the exchange supports the timeframe
        bundle = self.exchange_bundles[exchange_name]  # type: ExchangeBundle

        freq, candle_size, unit, adj_data_frequency = get_frequency(
            frequency, data_frequency, supported_freqs=['T', 'D']
        )
        adj_bar_count = candle_size * bar_count

        if data_frequency == 'minute' and adj_data_frequency == 'daily':
            end_dt = end_dt.floor('1D')

        series = bundle.get_history_window_series_and_load(
            assets=assets,
            end_dt=end_dt,
            bar_count=adj_bar_count,
            field=field,
            data_frequency=adj_data_frequency,
            algo_end_dt=self._last_available_session,
        )

        start_dt = get_start_dt(end_dt, adj_bar_count, adj_data_frequency)
        df = resample_history_df(pd.DataFrame(series), freq, field, start_dt)
        return df
Exemple #2
0
    def test_ingest_candles(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('iot_btc')]

        end_dt = pd.to_datetime('2017-10-20', utc=True)
        bar_count = 100

        start_dt = get_start_dt(end_dt, bar_count, data_frequency)
        candles = exchange.get_candles(assets=assets,
                                       start_dt=start_dt,
                                       end_dt=end_dt,
                                       bar_count=bar_count,
                                       freq='1T')

        writer = bundle.get_writer(start_dt, end_dt, data_frequency)
        for asset in assets:
            dates = [candle['last_traded'] for candle in candles[asset]]

            values = dict()
            for field in ['open', 'high', 'low', 'close', 'volume']:
                values[field] = [candle[field] for candle in candles[asset]]

            periods = bundle.get_calendar_periods_range(
                start_dt, end_dt, data_frequency)
            df = pd.DataFrame(values, index=dates)
            df = df.loc[periods].fillna(method='ffill')

            # TODO: why do I get an extra bar?
            bundle.ingest_df(ohlcv_df=df,
                             data_frequency=data_frequency,
                             asset=asset,
                             writer=writer,
                             empty_rows_behavior='raise',
                             duplicates_behavior='raise')

        bundle_series = bundle.get_history_window_series(
            assets=assets,
            end_dt=end_dt,
            bar_count=bar_count,
            field='close',
            data_frequency=data_frequency,
            reset_reader=True)
        df = pd.DataFrame(bundle_series)
        print('\n' + df_to_string(df))
        pass
Exemple #3
0
    def test_validate_data(self):
        exchange_name = 'bitfinex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        exchange_bundle = ExchangeBundle(exchange)
        assets = [exchange.get_asset('iot_btc')]

        end_dt = pd.to_datetime('2017-9-2 1:00', utc=True)
        bar_count = 60

        bundle_series = exchange_bundle.get_history_window_series(
            assets=assets,
            end_dt=end_dt,
            bar_count=bar_count * 5,
            field='close',
            data_frequency='minute',
        )
        candles = exchange.get_candles(assets=assets,
                                       end_dt=end_dt,
                                       bar_count=bar_count,
                                       freq='1T')
        start_dt = get_start_dt(end_dt, bar_count, data_frequency)

        frames = []
        for asset in assets:
            bundle_df = pd.DataFrame(
                data=dict(bundle_price=bundle_series[asset]),
                index=bundle_series[asset].index)
            exchange_series = exchange.get_series_from_candles(
                candles=candles[asset],
                start_dt=start_dt,
                end_dt=end_dt,
                data_frequency=data_frequency,
                field='close')
            exchange_df = pd.DataFrame(
                data=dict(exchange_price=exchange_series),
                index=exchange_series.index)

            df = exchange_df.join(bundle_df, how='left')
            df['last_traded'] = df.index
            df['asset'] = asset.symbol
            df.set_index(['asset', 'last_traded'], inplace=True)

            frames.append(df)

        df = pd.concat(frames)
        print('\n' + df_to_string(df))
        pass
Exemple #4
0
    def get_history_window_with_bundle(self,
                                       assets,
                                       end_dt,
                                       bar_count,
                                       frequency,
                                       field,
                                       data_frequency=None,
                                       ffill=True,
                                       force_auto_ingest=False):
        """
        Public API method that returns a dataframe containing the requested
        history window.  Data is fully adjusted.

        Parameters
        ----------
        assets : list[TradingPair]
            The assets whose data is desired.

        end_dt: datetime
            The date of the last bar.

        bar_count: int
            The number of bars desired.

        frequency: string
            "1d" or "1m"

        field: string
            The desired field of the asset.

        data_frequency: string
            The frequency of the data to query; i.e. whether the data is
            'daily' or 'minute' bars.

        # TODO: fill how?
        ffill: boolean
            Forward-fill missing values. Only has effect if field
            is 'price'.

        Returns
        -------
        DataFrame
            A dataframe containing the requested data.

        """
        # TODO: this function needs some work,
        # we're currently using it just for benchmark data
        freq, candle_size, unit, data_frequency = get_frequency(
            frequency, data_frequency, supported_freqs=['T', 'D'])
        adj_bar_count = candle_size * bar_count
        try:
            series = self.bundle.get_history_window_series_and_load(
                assets=assets,
                end_dt=end_dt,
                bar_count=adj_bar_count,
                field=field,
                data_frequency=data_frequency,
                force_auto_ingest=force_auto_ingest)

        except (PricingDataNotLoadedError, NoDataAvailableOnExchange):
            series = dict()

        for asset in assets:
            if asset not in series or series[asset].index[-1] < end_dt:
                # Adding bars too recent to be contained in the consolidated
                # exchanges bundles. We go directly against the exchange
                # to retrieve the candles.
                start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency)
                trailing_dt = \
                    series[asset].index[-1] + get_delta(1, data_frequency) \
                    if asset in series else start_dt

                # The get_history method supports multiple asset
                # Use the original frequency to let each api optimize
                # the size of result sets
                trailing_bars = get_periods(trailing_dt, end_dt, freq)
                candles = self.get_candles(
                    freq=freq,
                    assets=asset,
                    end_dt=end_dt,
                    bar_count=trailing_bars if trailing_bars < 500 else 500,
                )

                last_value = series[asset].iloc(0) if asset in series \
                    else np.nan

                # Create a series with the common data_frequency, ffill
                # missing values
                candle_series = self.get_series_from_candles(
                    candles=candles,
                    start_dt=trailing_dt,
                    end_dt=end_dt,
                    data_frequency=data_frequency,
                    field=field,
                    previous_value=last_value)

                if asset in series:
                    series[asset].append(candle_series)

                else:
                    series[asset] = candle_series

        df = resample_history_df(pd.DataFrame(series), freq, field)
        # TODO: consider this more carefully
        df.dropna(inplace=True)

        return df
Exemple #5
0
    def get_history_window_series(self,
                                  assets,
                                  end_dt,
                                  bar_count,
                                  field,
                                  data_frequency,
                                  reset_reader=False):
        start_dt = get_start_dt(end_dt, bar_count, data_frequency, False)
        start_dt, _ = self.get_adj_dates(
            start_dt, end_dt, assets, data_frequency
        )

        # This is an attempt to resolve some caching with the reader
        # when auto-ingesting data.
        # TODO: needs more work
        reader = self.get_reader(data_frequency)
        if reset_reader:
            del self._readers[reader._rootdir]
            reader = self.get_reader(data_frequency)

        if reader is None:
            symbols = [asset.symbol for asset in assets]
            raise PricingDataNotLoadedError(
                field=field,
                first_trading_day=min([asset.start_date for asset in assets]),
                exchange=self.exchange_name,
                symbols=symbols,
                symbol_list=','.join(symbols),
                data_frequency=data_frequency,
                start_dt=start_dt,
                end_dt=end_dt
            )

        series = dict()
        for asset in assets:
            asset_start_dt, _ = self.get_adj_dates(
                start_dt, end_dt, assets, data_frequency
            )
            in_bundle = range_in_bundle(
                asset, asset_start_dt, end_dt, reader
            )
            if not in_bundle:
                raise PricingDataNotLoadedError(
                    field=field,
                    first_trading_day=asset.start_date,
                    exchange=self.exchange_name,
                    symbols=asset.symbol,
                    symbol_list=asset.symbol,
                    data_frequency=data_frequency,
                    start_dt=asset_start_dt,
                    end_dt=end_dt
                )

            periods = self.get_calendar_periods_range(
                asset_start_dt, end_dt, data_frequency
            )
            # This does not behave well when requesting multiple assets
            # when the start or end date of one asset is outside of the range
            # looking at the logic in load_raw_arrays(), we are not achieving
            # any performance gain by requesting multiple sids at once. It's
            # looping through the sids and making separate requests anyway.
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=[field],
                start_dt=start_dt,
                end_dt=end_dt
            )
            if len(arrays) == 0:
                raise DataCorruptionError(
                    exchange=self.exchange_name,
                    symbols=asset.symbol,
                    start_dt=asset_start_dt,
                    end_dt=end_dt
                )

            field_values = arrays[0][:, 0]

            try:
                value_series = pd.Series(field_values, index=periods)
                series[asset] = value_series
            except ValueError as e:
                raise PricingDataValueError(
                    exchange=asset.exchange,
                    symbol=asset.symbol,
                    start_dt=asset_start_dt,
                    end_dt=end_dt,
                    error=e
                )

        return series
Exemple #6
0
    def get_history_window_series_and_load(self,
                                           assets,
                                           end_dt,
                                           bar_count,
                                           field,
                                           data_frequency,
                                           algo_end_dt=None,
                                           force_auto_ingest=False
                                           ):
        """
        Retrieve price data history, ingest missing data.

        Parameters
        ----------
        assets: list[TradingPair]
        end_dt: pd.Timestamp
        bar_count: int
        field: str
        data_frequency: str
        algo_end_dt: pd.Timestamp

        Returns
        -------
        Series

        """
        if AUTO_INGEST or force_auto_ingest:
            try:
                series = self.get_history_window_series(
                    assets=assets,
                    end_dt=end_dt,
                    bar_count=bar_count,
                    field=field,
                    data_frequency=data_frequency,
                )
                return pd.DataFrame(series)

            except PricingDataNotLoadedError:
                start_dt = get_start_dt(end_dt, bar_count, data_frequency)
                log.info(
                    'pricing data for {symbol} not found in range '
                    '{start} to {end}, updating the bundles.'.format(
                        symbol=[asset.symbol for asset in assets],
                        start=start_dt,
                        end=end_dt
                    )
                )
                self.ingest_assets(
                    assets=assets,
                    start_dt=start_dt,
                    end_dt=algo_end_dt,  # TODO: apply trailing bars
                    data_frequency=data_frequency,
                    show_progress=True,
                    show_breakdown=True
                )
                series = self.get_history_window_series(
                    assets=assets,
                    end_dt=end_dt,
                    bar_count=bar_count,
                    field=field,
                    data_frequency=data_frequency,
                    reset_reader=True,
                )
                return series

        else:
            series = self.get_history_window_series(
                assets=assets,
                end_dt=end_dt,
                bar_count=bar_count,
                field=field,
                data_frequency=data_frequency,
            )
            return pd.DataFrame(series)
Exemple #7
0
    def get_exchange_history_window(self,
                                    exchange_name,
                                    assets,
                                    end_dt,
                                    bar_count,
                                    frequency,
                                    field,
                                    data_frequency,
                                    ffill=True):
        """
        Fetching price history window from the exchange bundle.

        Parameters
        ----------
        exchange: Exchange
        assets: list[TradingPair]
        end_dt: datetime
        bar_count: int
        frequency: str
        field: str
        data_frequency: str
        ffill: bool

        Returns
        -------
        DataFrame

        """
        # TODO: verify that the exchange supports the timeframe
        bundle = self.exchange_bundles[exchange_name]  # type: ExchangeBundle

        freq, candle_size, unit, adj_data_frequency = get_frequency(
            frequency, data_frequency, supported_freqs=['T', 'D'])
        adj_bar_count = candle_size * bar_count

        if data_frequency == "minute":
            # for minute frequency always request data until the
            # current minute (do not include the current minute)
            last_dt_for_series = end_dt - datetime.timedelta(minutes=1)

            # read the minute bundles for daily frequency to
            # support last partial candle
            # TODO: optimize this by applying this logic only for the last day
            if adj_data_frequency == 'daily':
                adj_data_frequency = 'minute'
                adj_bar_count = adj_bar_count * 1440

        else:  # data_frequency == "daily":
            last_dt_for_series = end_dt

        series = bundle.get_history_window_series_and_load(
            assets=assets,
            end_dt=last_dt_for_series,
            bar_count=adj_bar_count,
            field=field,
            data_frequency=adj_data_frequency,
            algo_end_dt=self._last_available_session,
        )

        start_dt = get_start_dt(last_dt_for_series, adj_bar_count,
                                adj_data_frequency, False)
        df = resample_history_df(pd.DataFrame(series), freq, field, start_dt)

        return df
Exemple #8
0
    def get_history_window_with_bundle(self,
                                       assets,
                                       end_dt,
                                       bar_count,
                                       frequency,
                                       field,
                                       data_frequency=None,
                                       ffill=True,
                                       force_auto_ingest=False):

        """
        Public API method that returns a dataframe containing the requested
        history window.  Data is fully adjusted.

        Parameters
        ----------
        assets : list[TradingPair]
            The assets whose data is desired.

        end_dt: datetime
            The date of the last bar.

        bar_count: int
            The number of bars desired.

        frequency: string
            "1d" or "1m"

        field: string
            The desired field of the asset.

        data_frequency: string
            The frequency of the data to query; i.e. whether the data is
            'daily' or 'minute' bars.

        # TODO: fill how?
        ffill: boolean
            Forward-fill missing values. Only has effect if field
            is 'price'.

        Returns
        -------
        DataFrame
            A dataframe containing the requested data.

        """
        # TODO: this function needs some work, we're currently using it just for benchmark data
        freq, candle_size, unit, data_frequency = get_frequency(
            frequency, data_frequency
        )
        adj_bar_count = candle_size * bar_count
        try:
            series = self.bundle.get_history_window_series_and_load(
                assets=assets,
                end_dt=end_dt,
                bar_count=adj_bar_count,
                field=field,
                data_frequency=data_frequency,
                force_auto_ingest=force_auto_ingest
            )

        except (PricingDataNotLoadedError, NoDataAvailableOnExchange):
            series = dict()

        for asset in assets:
            if asset not in series or series[asset].index[-1] < end_dt:
                # Adding bars too recent to be contained in the consolidated
                # exchanges bundles. We go directly against the exchange
                # to retrieve the candles.
                start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency)
                trailing_dt = \
                    series[asset].index[-1] + get_delta(1, data_frequency) \
                        if asset in series else start_dt

                # The get_history method supports multiple asset
                # Use the original frequency to let each api optimize
                # the size of result sets
                trailing_bars = get_periods(
                    trailing_dt, end_dt, freq
                )
                candles = self.get_candles(
                    freq=freq,
                    assets=asset,
                    end_dt=end_dt,
                    bar_count=trailing_bars if trailing_bars < 500 else 500,
                )

                last_value = series[asset].iloc(0) if asset in series \
                    else np.nan

                # Create a series with the common data_frequency, ffill
                # missing values
                candle_series = self.get_series_from_candles(
                    candles=candles,
                    start_dt=trailing_dt,
                    end_dt=end_dt,
                    data_frequency=data_frequency,
                    field=field,
                    previous_value=last_value
                )

                if asset in series:
                    series[asset].append(candle_series)

                else:
                    series[asset] = candle_series

        df = resample_history_df(pd.DataFrame(series), freq, field)
        # TODO: consider this more carefully
        df.dropna(inplace=True)

        return df