Example #1
0
    def get_history_window_with_bundle(self,
                                       assets,
                                       end_dt,
                                       bar_count,
                                       frequency,
                                       field,
                                       data_frequency=None,
                                       ffill=True,
                                       force_auto_ingest=False):
        """
        Public API method that returns a dataframe containing the requested
        history window.  Data is fully adjusted.

        Parameters
        ----------
        assets : list[TradingPair]
            The assets whose data is desired.

        end_dt: datetime
            The date of the last bar.

        bar_count: int
            The number of bars desired.

        frequency: string
            "1d" or "1m"

        field: string
            The desired field of the asset.

        data_frequency: string
            The frequency of the data to query; i.e. whether the data is
            'daily' or 'minute' bars.

        # TODO: fill how?
        ffill: boolean
            Forward-fill missing values. Only has effect if field
            is 'price'.

        Returns
        -------
        DataFrame
            A dataframe containing the requested data.

        """
        # TODO: this function needs some work,
        # we're currently using it just for benchmark data
        freq, candle_size, unit, data_frequency = get_frequency(
            frequency, data_frequency, supported_freqs=['T', 'D'])
        adj_bar_count = candle_size * bar_count
        try:
            series = self.bundle.get_history_window_series_and_load(
                assets=assets,
                end_dt=end_dt,
                bar_count=adj_bar_count,
                field=field,
                data_frequency=data_frequency,
                force_auto_ingest=force_auto_ingest)

        except (PricingDataNotLoadedError, NoDataAvailableOnExchange):
            series = dict()

        for asset in assets:
            if asset not in series or series[asset].index[-1] < end_dt:
                # Adding bars too recent to be contained in the consolidated
                # exchanges bundles. We go directly against the exchange
                # to retrieve the candles.
                start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency)
                trailing_dt = \
                    series[asset].index[-1] + get_delta(1, data_frequency) \
                    if asset in series else start_dt

                # The get_history method supports multiple asset
                # Use the original frequency to let each api optimize
                # the size of result sets
                trailing_bars = get_periods(trailing_dt, end_dt, freq)
                candles = self.get_candles(
                    freq=freq,
                    assets=asset,
                    end_dt=end_dt,
                    bar_count=trailing_bars if trailing_bars < 500 else 500,
                )

                last_value = series[asset].iloc(0) if asset in series \
                    else np.nan

                # Create a series with the common data_frequency, ffill
                # missing values
                candle_series = self.get_series_from_candles(
                    candles=candles,
                    start_dt=trailing_dt,
                    end_dt=end_dt,
                    data_frequency=data_frequency,
                    field=field,
                    previous_value=last_value)

                if asset in series:
                    series[asset].append(candle_series)

                else:
                    series[asset] = candle_series

        df = resample_history_df(pd.DataFrame(series), freq, field)
        # TODO: consider this more carefully
        df.dropna(inplace=True)

        return df
Example #2
0
    def get_history_window_with_bundle(self,
                                       assets,
                                       end_dt,
                                       bar_count,
                                       frequency,
                                       field,
                                       data_frequency=None,
                                       ffill=True,
                                       force_auto_ingest=False):

        """
        Public API method that returns a dataframe containing the requested
        history window.  Data is fully adjusted.

        Parameters
        ----------
        assets : list[TradingPair]
            The assets whose data is desired.

        end_dt: datetime
            The date of the last bar.

        bar_count: int
            The number of bars desired.

        frequency: string
            "1d" or "1m"

        field: string
            The desired field of the asset.

        data_frequency: string
            The frequency of the data to query; i.e. whether the data is
            'daily' or 'minute' bars.

        # TODO: fill how?
        ffill: boolean
            Forward-fill missing values. Only has effect if field
            is 'price'.

        Returns
        -------
        DataFrame
            A dataframe containing the requested data.

        """
        # TODO: this function needs some work, we're currently using it just for benchmark data
        freq, candle_size, unit, data_frequency = get_frequency(
            frequency, data_frequency
        )
        adj_bar_count = candle_size * bar_count
        try:
            series = self.bundle.get_history_window_series_and_load(
                assets=assets,
                end_dt=end_dt,
                bar_count=adj_bar_count,
                field=field,
                data_frequency=data_frequency,
                force_auto_ingest=force_auto_ingest
            )

        except (PricingDataNotLoadedError, NoDataAvailableOnExchange):
            series = dict()

        for asset in assets:
            if asset not in series or series[asset].index[-1] < end_dt:
                # Adding bars too recent to be contained in the consolidated
                # exchanges bundles. We go directly against the exchange
                # to retrieve the candles.
                start_dt = get_start_dt(end_dt, adj_bar_count, data_frequency)
                trailing_dt = \
                    series[asset].index[-1] + get_delta(1, data_frequency) \
                        if asset in series else start_dt

                # The get_history method supports multiple asset
                # Use the original frequency to let each api optimize
                # the size of result sets
                trailing_bars = get_periods(
                    trailing_dt, end_dt, freq
                )
                candles = self.get_candles(
                    freq=freq,
                    assets=asset,
                    end_dt=end_dt,
                    bar_count=trailing_bars if trailing_bars < 500 else 500,
                )

                last_value = series[asset].iloc(0) if asset in series \
                    else np.nan

                # Create a series with the common data_frequency, ffill
                # missing values
                candle_series = self.get_series_from_candles(
                    candles=candles,
                    start_dt=trailing_dt,
                    end_dt=end_dt,
                    data_frequency=data_frequency,
                    field=field,
                    previous_value=last_value
                )

                if asset in series:
                    series[asset].append(candle_series)

                else:
                    series[asset] = candle_series

        df = resample_history_df(pd.DataFrame(series), freq, field)
        # TODO: consider this more carefully
        df.dropna(inplace=True)

        return df
Example #3
0
    def get_history_window(self,
                           assets,
                           end_dt,
                           bar_count,
                           frequency,
                           field,
                           data_frequency=None,
                           is_current=False):
        """
        Public API method that returns a dataframe containing the requested
        history window.  Data is fully adjusted.

        Parameters
        ----------
        assets : list[TradingPair]
            The assets whose data is desired.

        end_dt: datetime
            The date of the last bar

        bar_count: int
            The number of bars desired.

        frequency: string
            "1d" or "1m"

        field: string
            The desired field of the asset.

        data_frequency: string
            The frequency of the data to query; i.e. whether the data is
            'daily' or 'minute' bars.

        is_current: bool
            Skip date filters when current data is requested (last few bars
            until now).

        Notes
        -----
        Catalysts requires an end data with bar count both CCXT wants a
        start data with bar count. Since we have to make calculations here,
        we ensure that the last candle match the end_dt parameter.

        Returns
        -------
        DataFrame
            A dataframe containing the requested data.

        """
        freq, candle_size, unit, data_frequency = get_frequency(
            frequency, data_frequency)
        # The get_history method supports multiple asset
        candles = self.get_candles(
            freq=freq,
            assets=assets,
            bar_count=bar_count,
            end_dt=end_dt if not is_current else None,
        )

        series = dict()
        for asset in candles:
            first_candle = candles[asset][0]
            asset_series = self.get_series_from_candles(
                candles=candles[asset],
                start_dt=first_candle['last_traded'],
                end_dt=end_dt,
                data_frequency=frequency,
                field=field,
            )

            # Checking to make sure that the dates match
            delta = get_delta(candle_size, data_frequency)
            adj_end_dt = end_dt - delta
            last_traded = asset_series.index[-1]

            if last_traded < adj_end_dt:
                raise LastCandleTooEarlyError(
                    last_traded=last_traded,
                    end_dt=adj_end_dt,
                    exchange=self.name,
                )

            series[asset] = asset_series

        df = pd.DataFrame(series)
        df.dropna(inplace=True)

        return df
Example #4
0
    def get_history_window(self,
                           assets,
                           end_dt,
                           bar_count,
                           frequency,
                           field,
                           data_frequency=None,
                           is_current=False):

        """
        Public API method that returns a dataframe containing the requested
        history window.  Data is fully adjusted.

        Parameters
        ----------
        assets : list[TradingPair]
            The assets whose data is desired.

        end_dt: datetime
            The date of the last bar

        bar_count: int
            The number of bars desired.

        frequency: string
            "1d" or "1m"

        field: string
            The desired field of the asset.

        data_frequency: string
            The frequency of the data to query; i.e. whether the data is
            'daily' or 'minute' bars.

        is_current: bool
            Skip date filters when current data is requested (last few bars
            until now).

        Notes
        -----
        Catalysts requires an end data with bar count both CCXT wants a
        start data with bar count. Since we have to make calculations here,
        we ensure that the last candle match the end_dt parameter.

        Returns
        -------
        DataFrame
            A dataframe containing the requested data.

        """
        freq, candle_size, unit, data_frequency = get_frequency(
            frequency, data_frequency
        )
        # The get_history method supports multiple asset
        candles = self.get_candles(
            freq=freq,
            assets=assets,
            bar_count=bar_count,
            end_dt=end_dt if not is_current else None,
        )

        series = dict()
        for asset in candles:
            first_candle = candles[asset][0]
            asset_series = self.get_series_from_candles(
                candles=candles[asset],
                start_dt=first_candle['last_traded'],
                end_dt=end_dt,
                data_frequency=frequency,
                field=field,
            )

            # Checking to make sure that the dates match
            delta = get_delta(candle_size, data_frequency)
            adj_end_dt = end_dt - delta
            last_traded = asset_series.index[-1]

            if last_traded < adj_end_dt:
                raise LastCandleTooEarlyError(
                    last_traded=last_traded,
                    end_dt=adj_end_dt,
                    exchange=self.name,
                )

            series[asset] = asset_series

        df = pd.DataFrame(series)
        df.dropna(inplace=True)

        return df
Example #5
0
    def get_history_window_series(self,
                                  assets,
                                  end_dt,
                                  bar_count,
                                  field,
                                  data_frequency,
                                  trailing_bar_count=None,
                                  reset_reader=False):
        if trailing_bar_count:
            delta = get_delta(trailing_bar_count, data_frequency)
            end_dt += delta

        start_dt = get_start_dt(end_dt, bar_count, data_frequency, False)
        start_dt, _ = self.get_adj_dates(
            start_dt, end_dt, assets, data_frequency
        )

        # This is an attempt to resolve some caching with the reader
        # when auto-ingesting data.
        # TODO: needs more work
        reader = self.get_reader(data_frequency)
        if reset_reader:
            del self._readers[reader._rootdir]
            reader = self.get_reader(data_frequency)

        if reader is None:
            symbols = [asset.symbol for asset in assets]
            raise PricingDataNotLoadedError(
                field=field,
                first_trading_day=min([asset.start_date for asset in assets]),
                exchange=self.exchange_name,
                symbols=symbols,
                symbol_list=','.join(symbols),
                data_frequency=data_frequency,
                start_dt=start_dt,
                end_dt=end_dt
            )

        series = dict()
        for asset in assets:
            asset_start_dt, _ = self.get_adj_dates(
                start_dt, end_dt, assets, data_frequency
            )
            in_bundle = range_in_bundle(
                asset, asset_start_dt, end_dt, reader
            )
            if not in_bundle:
                raise PricingDataNotLoadedError(
                    field=field,
                    first_trading_day=asset.start_date,
                    exchange=self.exchange_name,
                    symbols=asset.symbol,
                    symbol_list=asset.symbol,
                    data_frequency=data_frequency,
                    start_dt=asset_start_dt,
                    end_dt=end_dt
                )

            periods = self.get_calendar_periods_range(
                asset_start_dt, end_dt, data_frequency
            )
            # This does not behave well when requesting multiple assets
            # when the start or end date of one asset is outside of the range
            # looking at the logic in load_raw_arrays(), we are not achieving
            # any performance gain by requesting multiple sids at once. It's
            # looping through the sids and making separate requests anyway.
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=[field],
                start_dt=start_dt,
                end_dt=end_dt
            )
            if len(arrays) == 0:
                raise DataCorruptionError(
                    exchange=self.exchange_name,
                    symbols=asset.symbol,
                    start_dt=asset_start_dt,
                    end_dt=end_dt
                )

            field_values = arrays[0][:, 0]

            try:
                value_series = pd.Series(field_values, index=periods)
                series[asset] = value_series
            except ValueError as e:
                raise PricingDataValueError(
                    exchange=asset.exchange,
                    symbol=asset.symbol,
                    start_dt=asset_start_dt,
                    end_dt=end_dt,
                    error=e
                )

        return series