예제 #1
0
    def _get_symbols_last_trade_value(self, symbols):
        """
        Query last_trade in parallel for multiple symbols and
        return in dict.
        symbols: list[str]
        """
        @skip_http_error((404, 504))
        def fetch(symbol):
            return self._api.get_latest_trade(symbol)

        return parallelize(fetch)(symbols)
예제 #2
0
    def can_trade(self, assets):
        """
        For the given asset or iterable of assets, returns true if all of the
        following are true:
        1) the asset is alive for the session of the current simulation time
          (if current simulation time is not a market minute, we use the next
          session)
        2) (if we are in minute mode) the asset's exchange is open at the
          current simulation time or at the simulation calendar's next market
          minute
        3) there is a known last price for the asset.

        Notes
        -----
        The second condition above warrants some further explanation.
        - If the asset's exchange calendar is identical to the simulation
        calendar, then this condition always returns True.
        - If there are market minutes in the simulation calendar outside of
        this asset's exchange's trading hours (for example, if the simulation
        is running on the CME calendar but the asset is MSFT, which trades on
        the NYSE), during those minutes, this condition will return false
        (for example, 3:15 am Eastern on a weekday, during which the CME is
        open but the NYSE is closed).

        Parameters
        ----------
        assets: Asset or iterable of assets

        Returns
        -------
        can_trade : bool or pd.Series[bool] indexed by asset.
        """
        dt = self.datetime

        if self._adjust_minutes:
            adjusted_dt = self._get_current_minute()
        else:
            adjusted_dt = dt

        data_portal = self.data_portal

        if isinstance(assets, Asset):
            return self._can_trade_for_asset(assets, dt, adjusted_dt,
                                             data_portal)
        else:

            def fetch(asset):
                return self._can_trade_for_asset(asset, dt, adjusted_dt,
                                                 data_portal)

            tradeable = parallelize(fetch)(assets)
            return pd.Series(data=tradeable, index=assets, dtype=bool)
예제 #3
0
    def _symbol_trades(self, symbols):
        '''
        Query last_trade in parallel for multiple symbols and
        return in dict.

        symbols: list[str]

        return: dict[str -> polygon.Trade]
        '''
        @skip_http_error((404, 504))
        def fetch(symbol):
            return self._api.polygon.last_trade(symbol)

        return parallelize(fetch)(symbols)
예제 #4
0
    def _symbol_bars(
            self,
            symbols,
            size,
            _from=None,
            to=None,
            limit=None):
        '''
        Query historic_agg either minute or day in parallel
        for multiple symbols, and return in dict.

        symbols: list[str]
        size:    str ('day', 'minute')
        _from:   str or pd.Timestamp
        to:      str or pd.Timestamp
        limit:   str or int

        return: dict[str -> pd.DataFrame]
        '''
        assert size in ('day', 'minute')

        # temp workaround for less bars after masking by
        # market hours
        query_limit = limit
        if query_limit is not None:
            query_limit *= 2

        @skip_http_error((404, 504))
        def fetch(symbol):
            df = self._api.polygon.historic_agg(
                size, symbol, _from, to, query_limit).df

            # zipline -> right label
            # API result -> left label (beginning of bucket)
            if size == 'minute':
                df.index += pd.Timedelta('1min')

                # mask out bars outside market hours
                mask = self._cal.minutes_in_range(
                    df.index[0], df.index[-1],
                ).tz_convert(NY)
                df = df.reindex(mask)

            if limit is not None:
                df = df.iloc[-limit:]
            return df

        return parallelize(fetch)(symbols)
예제 #5
0
    def _get_symbols_last_trade_value(self, symbols):
        """
        Query last_trade in parallel for multiple symbols and
        return in dict.

        symbols: list[str]

        return: dict[str -> polygon.Trade or alpaca.Trade]
        """
        @skip_http_error((404, 504))
        def fetch(symbol):
            if self._use_polygon:
                return self._api.polygon.last_trade(symbol)
            else:
                return self._api.get_last_trade(symbol)

        return parallelize(fetch)(symbols)
예제 #6
0
    def _symbol_bars(
            self,
            symbols,
            size,
            _from=None,
            to=None,
            limit=None):
        '''
        Query historic_agg either minute or day in parallel
        for multiple symbols, and return in dict.

        symbols: list[str]
        size:    str ('day', 'minute')
        _from:   str or pd.Timestamp
        to:      str or pd.Timestamp
        limit:   str or int

        return: dict[str -> pd.DataFrame]
        '''
        assert size in ('day', 'minute')

        if not (_from or to):
            to = pd.to_datetime('now', utc=True).tz_convert('America/New_York')

        if not (_from and to) and limit:
            # temp workaround for less bars after masking by
            # market hours
            query_limit = limit
            if query_limit is not None:
                query_limit *= 2

            if _from:
                if size == 'day':
                    to = _from + timedelta(days=query_limit+1)
                else:
                    to = _from + timedelta(minutes=query_limit+1)
            else:
                if size == 'day':
                    _from = to - timedelta(days=query_limit+1)
                else:
                    _from = to - timedelta(minutes=query_limit+1)

        @skip_http_error((404, 504))
        def fetch(symbol):
            df = self._api.polygon.historic_agg_v2(
                symbol, 1, size,
                int(_from.timestamp()) * 1000,
                int(to.timestamp()) * 1000
            ).df

            # rename Polygon's v2 agg fields to match their full titles
            df = df.rename(index=str, columns={
                't': 'timestamp',
                'o': 'open',
                'h': 'high',
                'l': 'low',
                'c': 'close',
                'v': 'volume'
            })

            # convert timestamps to datetimes
            # astype is necessary to deal with empty result
            df.index = pd.to_datetime(
                df.index.astype('str'),
                utc=True,
            ).tz_convert('America/New_York')
            df.index.name = 'timestamp'

            # zipline -> right label
            # API result -> left label (beginning of bucket)
            if size == 'minute':
                df.index += pd.Timedelta('1min')

                if not df.empty:
                    # mask out bars outside market hours
                    mask = self._cal.minutes_in_range(
                        df.index[0], df.index[-1],
                    ).tz_convert(NY)
                    df = df.reindex(mask)

            if limit is not None:
                df = df.iloc[-limit:]
            return df

        return parallelize(fetch)(symbols)
예제 #7
0
    def _fetch_bars_from_api(self,
                             symbols,
                             size,
                             _from=None,
                             to=None,
                             limit=None):
        """
        Query history bars either minute or day in parallel
        for multiple symbols

        you can pass:
        1 _from + to
        2 to + limit
        3 limit, this way will use the current time as to

        symbols: list[str]
        size:    str ('day', 'minute')
        _from:   str or pd.Timestamp
        to:      str or pd.Timestamp
        limit:   str or int

        return: MultiIndex dataframe that looks like this:
                       AA                          GOOG
                       open high low close volume  open high low close volume
        DatetimeIndex:

        columns: level 0 equity name, level 1 OHLCV

        """
        assert size in ('day', 'minute')

        assert (_from and to) or limit

        if not (_from and to):
            _from, to = self._get_from_and_to(size, limit, end_dt=to)
        if self._use_polygon:
            args = [{
                'symbols': symbol,
                '_from': _from,
                "to": to,
                "size": size
            } for symbol in symbols]
            result = parallelize(self._fetch_bars_from_api_internal)(args)
            if [df for df in result.values() if isinstance(df, pd.DataFrame)]:
                return pd.concat(result.values(), axis=1)
            else:
                return pd.DataFrame([])

        else:
            # alpaca support get real-time data of multi stocks(<200) at once
            parts = []
            for i in range(0, len(symbols), ALPACA_MAX_SYMBOLS_PER_REQUEST):
                part = symbols[i:i + ALPACA_MAX_SYMBOLS_PER_REQUEST]
                parts.append(part)
            args = [{
                'symbols': part,
                '_from': _from,
                "to": to,
                "size": size,
                "limit": limit
            } for part in parts]
            result = parallelize_with_multi_process(
                self._fetch_bars_from_api_internal)(args)

            return pd.concat(result, axis=1)
예제 #8
0
    def current(self, assets, fields):
        multiple_assets = _is_iterable(assets)
        multiple_fields = _is_iterable(fields)

        asset_list = assets if _is_iterable(assets) else [assets]
        field_list = fields if _is_iterable(fields) else [fields]

        fetch_args = []
        for asset in asset_list:
            for field in field_list:
                fetch_args.append((asset, field))

        if not self._adjust_minutes:

            def fetch(asset, field):
                return self.data_portal.get_spot_value(
                    asset, field, self._get_current_minute(),
                    self.data_frequency)
        else:

            def fetch(asset, field):
                return self.data_portal.get_adjusted_value(
                    asset,
                    field,
                    self._get_current_minute(),
                    None,  # this is used to be self.simulation_dt_func(). but
                    # it is a zipline residue, and it's not used
                    # anyways. so, just use empty arg
                    self.data_frequency)

        results = parallelize(fetch)(fetch_args)

        if not multiple_assets and not multiple_fields:
            # Return scalar value
            return results[(assets, fields)]
        elif multiple_assets and multiple_fields:
            # Return DataFrame indexed on field
            field_results = {field: {} for field in fields}
            for args, result in results.items():
                (asset, field) = args
                field_results[field][asset] = result
            data = {}
            for field in fields:
                series = pd.Series(data=field_results[field],
                                   index=assets,
                                   name=field)
                data[field] = series
            return pd.DataFrame(data)
        elif multiple_assets:
            # Multiple assets, single field
            # Return Series indexed on assets
            asset_results = {}
            for args, result in results.items():
                (asset, field) = args
                asset_results[asset] = result
            return pd.Series(data=asset_results, index=assets, name=fields)
        else:
            # Single asset, multiple fields
            # Return Series indexed on fields
            field_results = {}
            for args, result in results.items():
                (asset, field) = args
                field_results[field] = result
            return pd.Series(data=field_results,
                             index=fields,
                             name=assets.symbol)
예제 #9
0
    def _symbol_bars(self, symbols, size, _from=None, to=None, limit=None):
        """
        Query historic_agg_v2 either minute or day in parallel
        for multiple symbols, and return in dict.

        symbols: list[str]
        size:    str ('day', 'minute')
        _from:   str or pd.Timestamp
        to:      str or pd.Timestamp
        limit:   str or int

        return: dict[str -> pd.DataFrame]
        """
        assert size in ('day', 'minute')

        if not (_from or to):
            to = pd.to_datetime('now', utc=True).tz_convert('America/New_York')

        if not (_from and to) and limit:
            # temp workaround for less bars after masking by
            # market hours
            query_limit = limit
            if query_limit is not None:
                query_limit *= 2
            if _from:
                if size == 'day':
                    to = _from + timedelta(days=query_limit + 1)
                else:
                    to = _from + timedelta(minutes=query_limit + 1)
            else:
                if size == 'day':
                    _from = to - timedelta(days=query_limit + 1)
                else:
                    _from = to - timedelta(minutes=query_limit + 1)

        @skip_http_error((404, 504))
        def fetch(symbol):
            if self._use_polygon:
                df = self._api.polygon.historic_agg_v2(
                    symbol, 1, size,
                    int(_from.timestamp()) * 1000,
                    int(to.timestamp()) * 1000).df
            else:
                df = self._api.get_barset(symbol,
                                          size,
                                          start=_from.date().isoformat(),
                                          end=to.date().isoformat(),
                                          limit=limit).df[symbol]

            # zipline -> right label
            # API result -> left label (beginning of bucket)
            if size == 'minute':
                df.index += pd.Timedelta('1min')

                if not df.empty:
                    # mask out bars outside market hours
                    mask = self._cal.minutes_in_range(
                        df.index[0],
                        df.index[-1],
                    ).tz_convert(NY)
                    df = df.reindex(mask)

            if limit is not None:
                df = df.iloc[-limit:]
            return df

        return parallelize(fetch)(symbols)