def _get_data_for_multiple_tickers(self, tickers, fields, start_date, end_date, use_prices_types): if use_prices_types: type_of_field = PriceField def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider): prices = data_prov.get_price(tickers_for_single_data_provider, fields, start_date, end_date) return prices else: type_of_field = str def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider): prices = data_prov.get_history(tickers_for_single_data_provider, fields, start_date, end_date) return prices tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, type_of_field) got_single_date = start_date is not None and (start_date == end_date) partial_results = [] for ticker_class, ticker_group in groupby(tickers, lambda t: type(t)): data_provider = self._identify_data_provider(ticker_class) partial_result = get_data_func(data_provider, list(ticker_group)) if partial_result is not None: partial_results.append(partial_result) result = QFDataArray.concat(partial_results, dim=TICKERS) normalized_result = normalize_data_array( result, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types) return normalized_result
def get_history( self, tickers: Union[CcyTicker, Sequence[CcyTicker]], fields: Union[None, str, Sequence[str]] = None, start_date: datetime = None, end_date: datetime = None, **kwargs) \ -> Union[QFSeries, QFDataFrame, QFDataArray]: tickers, got_single_ticker = convert_to_list(tickers, CcyTicker) got_single_date = start_date is not None and (start_date == end_date) if fields is not None: fields, got_single_field = convert_to_list(fields, (PriceField, str)) else: got_single_field = False # all existing fields will be present in the result tickers_data_dict = {} with Session() as session: for ticker in tickers: single_ticker_data = self._get_single_ticker( ticker, fields, start_date, end_date, session) tickers_data_dict[ticker] = single_ticker_data if fields is None: fields = get_fields_from_tickers_data_dict(tickers_data_dict) result_data_array = tickers_dict_to_data_array(tickers_data_dict, tickers, fields) result = normalize_data_array(result_data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field) return result
def get_history( self, tickers: Union[BloombergTicker, Sequence[BloombergTicker]], fields: Union[str, Sequence[str]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY, currency: str = None, override_name: str = None, override_value: str = None) \ -> Union[QFSeries, QFDataFrame, QFDataArray]: if fields is None: raise ValueError("Fields being None is not supported by {}".format(self.__class__.__name__)) self._connect_if_needed() self._assert_is_connected() got_single_date = start_date is not None and (start_date == end_date) tickers, got_single_ticker = convert_to_list(tickers, BloombergTicker) fields, got_single_field = convert_to_list(fields, (PriceField, str)) if end_date is None: end_date = datetime.now() tickers_str = tickers_as_strings(tickers) data_array = self._historical_data_provider.get( tickers_str, fields, start_date, end_date, frequency, currency, override_name, override_value) normalized_result = normalize_data_array( data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field) return normalized_result
def get_history(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[str, Sequence[str]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY, **kwargs ) -> Union[QFSeries, QFDataFrame, QFDataArray]: # Verify whether the passed frequency parameter is correct and can be used with the preset data assert frequency == self._frequency, "Currently, for the get history does not support data sampling" if end_date is None: end_date = datetime.now() if frequency > Frequency.DAILY: # In case of high frequency - the data array should not include the end_date. The data range is # labeled with the beginning index time and excludes the end of the time range, therefore a new # end date is computed. end_date = end_date - Frequency.MIN_1.time_delta() # In order to be able to return data for FutureTickers create a mapping between tickers and corresponding # specific tickers (in case of non FutureTickers it will be an identity mapping) tickers, got_single_ticker = convert_to_list(tickers, Ticker) tickers_mapping = { (t.get_current_specific_ticker() if isinstance(t, FutureTicker) else t): t for t in tickers } specific_tickers = list(tickers_mapping.keys()) fields, got_single_field = convert_to_list(fields, str) got_single_date = start_date is not None and ( (start_date == end_date) if frequency <= Frequency.DAILY else (start_date + frequency.time_delta() >= end_date) ) self._check_if_cached_data_available(specific_tickers, fields, start_date, end_date) data_array = self._data_bundle.loc[start_date:end_date, specific_tickers, fields] normalized_result = normalize_data_array(data_array, specific_tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=False) # Map the specific tickers onto the tickers given by the tickers_mapping array if isinstance(normalized_result, QFDataArray): normalized_result = normalized_result.assign_coords( tickers=[tickers_mapping[t] for t in normalized_result.tickers.values]) elif isinstance(normalized_result, PricesDataFrame): normalized_result = normalized_result.rename(columns=tickers_mapping) elif isinstance(normalized_result, PricesSeries): # Name of the PricesSeries can only contain strings ticker = tickers[0] normalized_result = normalized_result.rename(ticker.name) return normalized_result
def get_price( self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime = None) -> Union[None, PricesSeries, PricesDataFrame, QFDataArray]: tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, PriceField) got_single_date = start_date is not None and (start_date == end_date) if self._check_data_availability: self._check_if_cached_data_available(tickers, fields, start_date, end_date) data_array = self._data_bundle.loc[start_date:end_date, tickers, fields] normalized_result = normalize_data_array( data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=True) return normalized_result
def _get_history( self, convert_to_prices_types: bool, tickers: Union[QuandlTicker, Sequence[QuandlTicker]], fields: Union[None, str, Sequence[str], PriceField, Sequence[PriceField]] = None, start_date: datetime = None, end_date: datetime = None) -> \ Union[QFSeries, QFDataFrame, QFDataArray]: """ NOTE: Only use one Quandl Database at the time. Do not mix multiple databases in one query - this is the natural limitation coming from the fact that column names (fields) are different across databases. """ tickers, got_single_ticker = convert_to_list(tickers, QuandlTicker) got_single_date = start_date is not None and (start_date == end_date) if fields is not None: fields, got_single_field = convert_to_list(fields, (PriceField, str)) else: got_single_field = False # all existing fields will be present in the result result_dict = {} for db_name, ticker_group in groupby(tickers, lambda t: t.database_name): ticker_group = list(ticker_group) partial_result_dict = self._get_result_for_single_database( convert_to_prices_types, ticker_group, fields, start_date, end_date) result_dict.update(partial_result_dict) if fields is None: fields = get_fields_from_tickers_data_dict(result_dict) result_data_array = tickers_dict_to_data_array(result_dict, tickers, fields) normalized_result = normalize_data_array( result_data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=convert_to_prices_types) return normalized_result
def get_price(tickers, fields, start_date, end_date, _): prices_bar = [5.0, 10.0, 1.0, 4.0, 50] # Open, High, Low, Close, Volume dates_index = pd.date_range(start_date, end_date, freq='B') tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, PriceField) got_single_date = len(dates_index) == 1 prices_df = pd.DataFrame( index=pd.Index(dates_index, name=TICKERS), columns=pd.Index(PriceField.ohlcv(), name=FIELDS), data=[prices_bar] * len(dates_index) ) data_array = tickers_dict_to_data_array({ ticker: prices_df for ticker in self.tickers }, self.tickers, PriceField.ohlcv()) return normalize_data_array(data_array.loc[start_date:end_date, tickers, fields], tickers, fields, got_single_date, got_single_ticker, got_single_field)
def __init__(self, path: str, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, List[PriceField]], start_date: datetime, end_date: datetime, frequency: Frequency): self.logger = qf_logger.getChild(self.__class__.__name__) if frequency not in [Frequency.DAILY, Frequency.MIN_1]: raise NotImplementedError("{} supports only DAILY and MIN_1 bars loading".format(self.__class__.__name__)) fields, _ = convert_to_list(fields, PriceField) # Convert to list and remove duplicates tickers, _ = convert_to_list(tickers, Ticker) tickers = list(dict.fromkeys(tickers)) future_tickers = [ticker for ticker in tickers if isinstance(ticker, FutureTicker)] non_future_tickers = [ticker for ticker in tickers if not isinstance(ticker, FutureTicker)] exp_dates = None all_tickers = non_future_tickers if future_tickers: exp_dates = self._get_expiration_dates(path, future_tickers) # Filter out all theses specific future contracts, which expired before start_date for ft in future_tickers: all_tickers.extend(chain_tickers_within_range(ft, exp_dates[ft], start_date, end_date)) data_array, contracts_df = self._get_price_and_contracts(path, all_tickers, fields, start_date, end_date, frequency) normalized_data_array = normalize_data_array(data_array, all_tickers, fields, False, False, False) self._contracts_df = contracts_df super().__init__(data=normalized_data_array, exp_dates=exp_dates, start_date=start_date, end_date=end_date, frequency=frequency)
def get_history(self, tickers: Union[BloombergTicker, Sequence[BloombergTicker]], fields: Union[str, Sequence[str]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY, currency: str = None, override_name: str = None, override_value: str = None) \ -> Union[QFSeries, QFDataFrame, QFDataArray]: """ Gets historical data from Bloomberg from the (start_date - end_date) time range. In case of frequency, which is higher than daily frequency (intraday data), the data is indexed by the start_date. E.g. Time range: 8:00 - 8:01, frequency: 1 minute - indexed with the 8:00 timestamp Parameters ---------- tickers: Ticker, Sequence[Ticker] tickers for securities which should be retrieved fields: None, str, Sequence[str] fields of securities which should be retrieved. If None, all available fields will be returned (only supported by few DataProviders) start_date: datetime date representing the beginning of historical period from which data should be retrieved end_date: datetime date representing the end of historical period from which data should be retrieved; if no end_date was provided, by default the current date will be used frequency: Frequency frequency of the data currency: str override_name: str override_value: str Returns ------- QFSeries, QFDataFrame, QFDataArray If possible the result will be squeezed, so that instead of returning QFDataArray, data of lower dimensionality will be returned. The results will be either an QFDataArray (with 3 dimensions: date, ticker, field), a QFDataFrame (with 2 dimensions: date, ticker or field; it is also possible to get 2 dimensions ticker and field if single date was provided) or QFSeries (with 1 dimensions: date). If no data is available in the database or an non existing ticker was provided an empty structure (QFSeries, QFDataFrame or QFDataArray) will be returned returned. """ if fields is None: raise ValueError("Fields being None is not supported by {}".format( self.__class__.__name__)) self._connect_if_needed() self._assert_is_connected() if end_date is None: end_date = datetime.now() got_single_date = start_date is not None and ( (start_date.date() == end_date.date()) if frequency == Frequency.DAILY else False) tickers, got_single_ticker = convert_to_list(tickers, BloombergTicker) fields, got_single_field = convert_to_list(fields, (PriceField, str)) def current_ticker(t: BloombergTicker): return t.get_current_specific_ticker() if isinstance( t, BloombergFutureTicker) else t tickers_mapping = {current_ticker(t): t for t in tickers} data_array = self._historical_data_provider.get( tickers, fields, start_date, end_date, frequency, currency, override_name, override_value) data_array = data_array.assign_coords(tickers=[ tickers_mapping.get(t, t) for t in data_array.tickers.values ]) normalized_result = normalize_data_array(data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field) return normalized_result
def get_price(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY) -> \ Union[None, PricesSeries, PricesDataFrame, QFDataArray]: # Frequency adjustment - Verify whether the passed frequency parameter is correct and can be used with the # preset data # The passed desired data frequency should be at most equal to the frequency of the initially loaded data # (in case of downsampling the data may be aggregated, but no data upsampling is supported). assert frequency <= self._frequency, "The passed data frequency should be at most equal to the frequency of " \ "the initially loaded data" # The PresetDataProvider does not support data aggregation for frequency lower than daily frequency if frequency < self._frequency and frequency <= Frequency.DAILY: raise NotImplementedError("Data aggregation for lower than daily frequency is not supported yet") if end_date is None: end_date = datetime.now() if frequency > Frequency.DAILY: # In case of high, intraday frequency - the data array should not include the end_date. The data range is # labeled with the beginning index time and excludes the end of the time range, therefore a new # end date is computed. end_date = end_date - Frequency.MIN_1.time_delta() # Prearrange all the necessary parameters # In order to be able to return data for FutureTickers create a mapping between tickers and corresponding # specific tickers (in case of non FutureTickers it will be an identity mapping) tickers, got_single_ticker = convert_to_list(tickers, Ticker) tickers_mapping = { (t.get_current_specific_ticker() if isinstance(t, FutureTicker) else t): t for t in tickers } specific_tickers = list(tickers_mapping.keys()) fields, got_single_field = convert_to_list(fields, PriceField) got_single_date = False if frequency > Frequency.DAILY else ( bool(start_date and (start_date == end_date)) ) self._check_if_cached_data_available(specific_tickers, fields, start_date, end_date) data_array = self._data_bundle.loc[start_date:end_date, specific_tickers, fields] # Data aggregation (allowed only for the Intraday Data and in case if more then 1 data point is found) if frequency < self._frequency and len(data_array[DATES]) > 0: data_array = self._aggregate_intraday_data(data_array, start_date, end_date, specific_tickers, fields, frequency) normalized_result = normalize_data_array( data_array, specific_tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=True ) # Map the specific tickers onto the tickers given by the tickers_mapping array if isinstance(normalized_result, QFDataArray): normalized_result = normalized_result.assign_coords( tickers=[tickers_mapping[t] for t in normalized_result.tickers.values]) elif isinstance(normalized_result, PricesDataFrame): normalized_result = normalized_result.rename(columns=tickers_mapping) elif isinstance(normalized_result, PricesSeries): # Name of the PricesSeries can only contain strings ticker = tickers[0] normalized_result = normalized_result.rename(ticker.ticker) return normalized_result
def _mock_get_price(self, tickers, fields, start_date, end_date, frequency): tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, PriceField) mock_daily_data = QFDataArray.create( dates=date_range(start='2021-05-01', end='2021-05-06', freq='D'), tickers=[self.ticker_1, self.ticker_2], fields=PriceField.ohlcv(), data=[ # 2021-05-01 [ # Open High Low Close Volume [25.0, 25.1, 25.2, 26.0, 25.3], # TICKER 1 [27.0, 27.1, 27.2, 28.0, 27.3] # TICKER 2 ], # 2021-05-02 [ # Open High Low Close Volume [None, None, None, None, None], # TICKER 1 [29.0, 29.1, 29.2, 30.0, 29.3] # TICKER 2 ], # 2021-05-03 [ # Open High Low Close Volume [31.0, 31.1, 31.2, 32.0, 31.3], # TICKER 1 [None, None, None, None, None] # TICKER 2 ], # 2021-05-04 [ # Open High Low Close Volume [31.0, None, None, None, None], # TICKER 1 [None, None, None, None, None] # TICKER 2 ], # 2021-05-05 [ # Open High Low Close Volume [25.0, 25.1, 25.2, None, 25.3], # TICKER 1 [27.0, 27.1, 27.2, None, 27.3] # TICKER 2 ], # 2021-05-06 [ # Open High Low Close Volume [None, None, None, None, None], # TICKER 1 [None, None, None, None, None] # TICKER 2 ], ]) mock_intraday_data = QFDataArray.create( dates=date_range(start='2021-05-01', end='2021-05-06', freq='D'), tickers=[self.ticker_1, self.ticker_2], fields=PriceField.ohlcv(), data=[ # 2021-05-01 [ # Open High Low Close Volume [25.0, 25.1, 25.2, 26.0, 25.3], # TICKER 1 [27.0, 27.1, 27.2, 28.0, 27.3] # TICKER 2 ], # 2021-05-02 [ # Open High Low Close Volume [None, None, None, None, None], # TICKER 1 [29.0, 29.1, 29.2, 30.0, 29.3] # TICKER 2 ], # 2021-05-03 [ # Open High Low Close Volume [31.0, 31.1, 31.2, 32.0, 31.3], # TICKER 1 [None, None, None, None, None] # TICKER 2 ], # 2021-05-04 [ # Open High Low Close Volume [31.0, None, None, None, None], # TICKER 1 [None, None, None, None, None] # TICKER 2 ], # 2021-05-05 [ # Open High Low Close Volume [25.0, 25.1, 25.2, None, 25.3], # TICKER 1 [27.0, 27.1, 27.2, None, 27.3] # TICKER 2 ], # 2021-05-06 [ # Open High Low Close Volume [None, None, None, None, None], # TICKER 1 [None, None, None, None, None] # TICKER 2 ], ]) data = mock_daily_data.loc[start_date:end_date, tickers, fields] if frequency == Frequency.DAILY else \ mock_intraday_data.loc[start_date:end_date, tickers, fields] return normalize_data_array(data, tickers, fields, False, got_single_ticker, got_single_field, True)