def test_compute_container_hash__data_array(self): ticker_1 = BloombergTicker("Example 1") ticker_2 = BloombergTicker("Example 2") prices_df_1 = QFDataFrame(data={ PriceField.Close: [1, 2, 3], PriceField.Open: [4, 5, 6] }) prices_df_2 = QFDataFrame(data={PriceField.Close: [5, 7, 8]}) data_array_1 = tickers_dict_to_data_array( { ticker_1: prices_df_1, ticker_2: prices_df_2 }, [ticker_1, ticker_2], [PriceField.Open, PriceField.Close]) data_array_2 = tickers_dict_to_data_array( { ticker_1: prices_df_1, ticker_2: prices_df_2, }, [ticker_1, ticker_2], [PriceField.Open, PriceField.Close]) data_array_3 = tickers_dict_to_data_array( { ticker_2: prices_df_2, ticker_1: prices_df_1, }, [ticker_1, ticker_2], [PriceField.Open, PriceField.Close]) self.assertEqual(compute_container_hash(data_array_1), compute_container_hash(data_array_2)) self.assertNotEqual(compute_container_hash(data_array_1), compute_container_hash(data_array_3))
def get_history( self, tickers: Union[CcyTicker, Sequence[CcyTicker]], fields: Union[None, str, Sequence[str]] = None, start_date: datetime = None, end_date: datetime = None, **kwargs) \ -> Union[QFSeries, QFDataFrame, QFDataArray]: tickers, got_single_ticker = convert_to_list(tickers, CcyTicker) got_single_date = start_date is not None and (start_date == end_date) if fields is not None: fields, got_single_field = convert_to_list(fields, (PriceField, str)) else: got_single_field = False # all existing fields will be present in the result tickers_data_dict = {} with Session() as session: for ticker in tickers: single_ticker_data = self._get_single_ticker( ticker, fields, start_date, end_date, session) tickers_data_dict[ticker] = single_ticker_data if fields is None: fields = get_fields_from_tickers_data_dict(tickers_data_dict) result_data_array = tickers_dict_to_data_array(tickers_data_dict, tickers, fields) result = normalize_data_array(result_data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field) return result
def get_history(self, filepath: str) -> QFDataArray: """ Method to parse hapi response and get history data Parameters ---------- filepath: str The full filepath with downloaded response Returns ------- QFDataArray QFDataArray with history data """ fields, content = self._get_fields_and_data_content( filepath, column_names=[ "Ticker", "Error code", "Num flds", "Pricing Source", "Dates" ]) tickers = content["Ticker"].unique().tolist() content["Dates"] = to_datetime(content["Dates"], format="%Y%m%d") content = content.drop( columns=["Error code", "Num flds", "Pricing Source"]) def extract_tickers_df(data_array, ticker: str): df = data_array[data_array["Ticker"] == ticker].drop( columns="Ticker") df = df.dropna(subset=["Dates"]).set_index("Dates") return df tickers_dict = {t: extract_tickers_df(content, t) for t in tickers} return tickers_dict_to_data_array(tickers_dict, tickers, fields)
def test_tickers_dict_to_data_array(self): ticker_1 = BloombergTicker("Example 1") ticker_2 = BloombergTicker("Example 2") fields = [PriceField.Open, PriceField.Close] index = self.index[:3] data = [[[4., 1.], [nan, 5.]], [[5., 2.], [nan, 7.]], [[6., 3.], [nan, 8.]]] prices_df_1 = QFDataFrame(data={ PriceField.Close: [1., 2., 3.], PriceField.Open: [4., 5., 6.] }, index=index) prices_df_2 = QFDataFrame(data={PriceField.Close: [5., 7., 8.]}, index=index) data_array = tickers_dict_to_data_array( { ticker_1: prices_df_1, ticker_2: prices_df_2 }, [ticker_1, ticker_2], fields) self.assertEqual(dtype("float64"), data_array.dtype) expected_data_array = QFDataArray.create(index, [ticker_1, ticker_2], fields, data) assert_equal(data_array, expected_data_array)
def _get_price_and_contracts(self, path: str, tickers: Sequence[Ticker], fields: Sequence[PriceField], start_date: datetime, end_date: datetime, freq: Frequency): field_to_price_field_dict = { 'Open': PriceField.Open, 'High': PriceField.High, 'Low': PriceField.Low, 'Close': PriceField.Close, 'LastPrice': PriceField.Close, 'Date': 'dates', 'Date_Time': 'dates' } # it is required to distinguish intraday and daily volume if freq == Frequency.MIN_1: field_to_price_field_dict['TradeVolume'] = PriceField.Volume # for intraday elif freq == Frequency.DAILY: field_to_price_field_dict['Volume'] = PriceField.Volume # for daily tickers_strings_to_tickers = { ticker.as_string(): ticker for ticker in tickers if not isinstance(ticker, FutureTicker) } tickers_paths = [list(Path(path).glob('**/{}.csv'.format(ticker_str))) for ticker_str in tickers_strings_to_tickers.keys()] joined_tickers_paths = [item for sublist in tickers_paths for item in sublist] tickers_prices_dict = {} contracts_data = {} for path in joined_tickers_paths: ticker_str = path.name.replace('.csv', '') ticker = tickers_strings_to_tickers[ticker_str] # It is important to save the Time and Date as strings, in order to correctly infer the date format df = QFDataFrame(pd.read_csv(path, dtype={"Time": str, "Date": str, "Date_Time": str})) if 'Time' in df and freq == Frequency.MIN_1: df.index = pd.to_datetime(df["Date"] + ' ' + df["Time"]) elif 'Time' not in df and 'Date' in df and freq == Frequency.DAILY: df.index = pd.to_datetime(df['Date']) else: self.logger.info(f"Ticker {ticker} does not satisfy timing requirements. File path: {path}") continue contracts_data[ticker] = df['Contract'] if 'Contract' in df.columns else QFSeries() df = df.rename(columns=field_to_price_field_dict) df = df.loc[start_date:end_date, df.columns.isin(fields)] fields_diff = set(fields).difference(df.columns) if fields_diff: self.logger.info("Not all fields are available for {}. Difference: {}".format(ticker, fields_diff)) tickers_prices_dict[ticker] = QFDataFrame(df) contracts_df = QFDataFrame(contracts_data) return tickers_dict_to_data_array(tickers_prices_dict, list(tickers_prices_dict.keys()), fields), contracts_df
def _setup_data_handler(self, volume_value: Optional[float]) -> DataHandler: dates = pd.date_range(str_to_date("2019-12-01"), str_to_date("2020-01-30"), freq='D') prices_data_frame = QFDataFrame(data={PriceField.Volume: [volume_value] * len(dates)}, index=dates) prices_data_array = tickers_dict_to_data_array({ self.ticker: prices_data_frame, }, [self.ticker], [PriceField.Volume]) data_provider = PresetDataProvider(prices_data_array, dates[0], dates[-1], Frequency.DAILY) timer = SettableTimer(dates[-1]) return DailyDataHandler(data_provider, timer)
def _receive_historical_response( self, requested_tickers: Sequence[BloombergTicker], requested_fields: Sequence[str]): ticker_str_to_ticker: Dict[str, BloombergTicker] = { t.as_string(): t for t in requested_tickers } response_events = get_response_events(self._session) tickers_data_dict = defaultdict( lambda: QFDataFrame(columns=requested_fields)) for event in response_events: try: check_event_for_errors(event) security_data = extract_security_data(event) check_security_data_for_errors(security_data) field_data_array = security_data.getElement(FIELD_DATA) dates = [ to_datetime(x.getElementAsDatetime(DATE)) for x in field_data_array.values() ] dates_fields_values = QFDataFrame(np.nan, index=dates, columns=requested_fields) for field_name in requested_fields: dates_fields_values.loc[:, field_name] = [ self._get_float_or_nan(data_of_date_elem, field_name) for data_of_date_elem in field_data_array.values() ] security_name = security_data.getElementAsString(SECURITY) try: ticker = ticker_str_to_ticker[security_name] tickers_data_dict[ticker] = tickers_data_dict[ ticker].append(dates_fields_values) except KeyError: self.logger.warning( f"Received data for a ticker which was not present in the request: " f"{security_name}. The data for that ticker will be excluded from parsing." ) except BloombergError as e: self.logger.error(e) return tickers_dict_to_data_array(tickers_data_dict, list(tickers_data_dict.keys()), requested_fields)
def _receive_historical_response(self, requested_tickers, requested_fields): response_events = get_response_events(self._session) # mapping: ticker -> DataArray[dates, fields] tickers_data_dict = dict() # type: Dict[BloombergTicker, pd.DataFrame] for event in response_events: check_event_for_errors(event) security_data = extract_security_data(event) security_name = security_data.getElementAsString(SECURITY) ticker = BloombergTicker.from_string(security_name) try: check_security_data_for_errors(security_data) field_data_array = security_data.getElement(FIELD_DATA) field_data_list = [ field_data_array.getValueAsElement(i) for i in range(field_data_array.numValues()) ] dates = [ pd.to_datetime(x.getElementAsDatetime(DATE)) for x in field_data_list ] data = np.empty((len(dates), len(requested_fields))) data[:] = np.nan dates_fields_values = pd.DataFrame(data, index=dates, columns=requested_fields) for field_name in requested_fields: dates_fields_values.loc[:, field_name] = [ self._get_float_or_nan(data_of_date_elem, field_name) for data_of_date_elem in field_data_list ] tickers_data_dict[ticker] = dates_fields_values except BloombergError: self.logger.exception( "Error in the received historical response") return tickers_dict_to_data_array(tickers_data_dict, requested_tickers, requested_fields)
def _get_history( self, convert_to_prices_types: bool, tickers: Union[QuandlTicker, Sequence[QuandlTicker]], fields: Union[None, str, Sequence[str], PriceField, Sequence[PriceField]] = None, start_date: datetime = None, end_date: datetime = None) -> \ Union[QFSeries, QFDataFrame, QFDataArray]: """ NOTE: Only use one Quandl Database at the time. Do not mix multiple databases in one query - this is the natural limitation coming from the fact that column names (fields) are different across databases. """ tickers, got_single_ticker = convert_to_list(tickers, QuandlTicker) got_single_date = start_date is not None and (start_date == end_date) if fields is not None: fields, got_single_field = convert_to_list(fields, (PriceField, str)) else: got_single_field = False # all existing fields will be present in the result result_dict = {} for db_name, ticker_group in groupby(tickers, lambda t: t.database_name): ticker_group = list(ticker_group) partial_result_dict = self._get_result_for_single_database( convert_to_prices_types, ticker_group, fields, start_date, end_date) result_dict.update(partial_result_dict) if fields is None: fields = get_fields_from_tickers_data_dict(result_dict) result_data_array = tickers_dict_to_data_array(result_dict, tickers, fields) normalized_result = normalize_data_array( result_data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=convert_to_prices_types) return normalized_result
def _get_data_for_backtest(self) -> QFDataArray: """ Creates a QFDataArray containing OHLCV values for all tickers passes to Fast Alpha Models Tester. """ print("\nLoading all price values of tickers:") self._timer.set_current_time(self._end_date) tickers_dict = {} for ticker in tqdm(self._tickers, file=sys.stdout): if isinstance(ticker, FutureTicker): fc = FuturesChain(ticker, self._data_handler) tickers_dict[ticker] = fc.get_price(PriceField.ohlcv(), self._start_date, self._end_date, Frequency.DAILY) else: tickers_dict[ticker] = self._data_handler.get_price(ticker, PriceField.ohlcv(), self._start_date, self._end_date) prices_data_array = tickers_dict_to_data_array(tickers_dict, self._tickers, PriceField.ohlcv()) return prices_data_array
def get_price(tickers, fields, start_date, end_date, _): prices_bar = [5.0, 10.0, 1.0, 4.0, 50] # Open, High, Low, Close, Volume dates_index = pd.date_range(start_date, end_date, freq='B') tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, PriceField) got_single_date = len(dates_index) == 1 prices_df = pd.DataFrame( index=pd.Index(dates_index, name=TICKERS), columns=pd.Index(PriceField.ohlcv(), name=FIELDS), data=[prices_bar] * len(dates_index) ) data_array = tickers_dict_to_data_array({ ticker: prices_df for ticker in self.tickers }, self.tickers, PriceField.ohlcv()) return normalize_data_array(data_array.loc[start_date:end_date, tickers, fields], tickers, fields, got_single_date, got_single_ticker, got_single_field)
def _get_intraday_data(self, ref_data_service, tickers: Sequence[BloombergTicker], fields, start_date, end_date, frequency): """ Sends requests for each ticker and combines the outputs together. """ tickers_data_dict = dict() for ticker in tickers: request = ref_data_service.createRequest("IntradayBarRequest") set_ticker(request, ticker.as_string()) self._set_intraday_time_period(request, start_date, end_date, frequency) self._session.sendRequest(request) tickers_data_dict[ticker] = self._receive_intraday_response( ticker, fields) return tickers_dict_to_data_array(tickers_data_dict, list(tickers_data_dict.keys()), fields)