def test_data_arrays_concat_on_tickers(self): ticker_1 = BloombergTicker("Example 1") ticker_2 = BloombergTicker("Example 2") fields = [PriceField.Open, PriceField.Close] index = date_range(start=str_to_date("2017-01-01"), periods=5, freq="D") index_1 = index[:3] data_1 = [[[4., 1.]], [[5., 2.]], [[6., 3.]]] data_array_1 = QFDataArray.create(index_1, [ticker_1], fields, data_1) self.assertEqual(np.dtype("float64"), data_array_1.dtype) index_2 = index[3:] data_2 = [[[np.nan, 10.]], [[np.nan, 14.]]] data_array_2 = QFDataArray.create(index_2, [ticker_2], fields, data_2) self.assertEqual(np.dtype("float64"), data_array_2.dtype) data = [[[4., 1.], [np.nan, np.nan]], [[5., 2.], [np.nan, np.nan]], [[6., 3.], [np.nan, np.nan]], [[np.nan, np.nan], [np.nan, 10.]], [[np.nan, np.nan], [np.nan, 14.]]] expected_data_array = QFDataArray.create(index, [ticker_1, ticker_2], fields, data) self.assertEqual(np.dtype("float64"), expected_data_array.dtype) concatenated_data_array = QFDataArray.concat( [data_array_1, data_array_2], dim=TICKERS) self.assertEqual(np.dtype("float64"), concatenated_data_array.dtype) assert_equal(expected_data_array, concatenated_data_array)
def _get_data_for_multiple_tickers(self, tickers, fields, start_date, end_date, use_prices_types): if use_prices_types: type_of_field = PriceField def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider): prices = data_prov.get_price(tickers_for_single_data_provider, fields, start_date, end_date) return prices else: type_of_field = str def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider): prices = data_prov.get_history(tickers_for_single_data_provider, fields, start_date, end_date) return prices tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, type_of_field) got_single_date = start_date is not None and (start_date == end_date) partial_results = [] for ticker_class, ticker_group in groupby(tickers, lambda t: type(t)): data_provider = self._identify_data_provider(ticker_class) partial_result = get_data_func(data_provider, list(ticker_group)) if partial_result is not None: partial_results.append(partial_result) result = QFDataArray.concat(partial_results, dim=TICKERS) normalized_result = normalize_data_array( result, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types) return normalized_result
def tickers_dict_to_data_array(tickers_data_dict: Dict[Ticker, QFDataFrame], requested_tickers: Union[Ticker, Sequence[Ticker]], requested_fields) -> QFDataArray: """ Converts a dictionary mapping tickers to DateFrame onto a QFDataArray. Parameters ---------- tickers_data_dict: Dict[Ticker, QFDataFrame] Ticker -> QFDataFrame[dates, fields] requested_tickers: Sequence[Ticker] requested_fields Returns ------- QFDataArray """ # return empty xr.DataArray if there is no data to be converted requested_tickers, _ = convert_to_list(requested_tickers, Ticker) if not tickers_data_dict: return QFDataArray.create(dates=[], tickers=requested_tickers, fields=requested_fields) tickers = [] data_arrays = [] for ticker, df in tickers_data_dict.items(): df.index.name = DATES if df.empty: # if there is no data for a given ticker, skip it (proper column will be added afterwards anyway) continue data_array = df.to_xarray() data_array = data_array.to_array(dim=FIELDS, name=ticker) data_array = data_array.transpose(DATES, FIELDS) tickers.append(ticker) data_arrays.append(data_array) tickers_index = pd.Index(tickers, name=TICKERS) if not data_arrays: return QFDataArray.create(dates=[], tickers=requested_tickers, fields=requested_fields) result = QFDataArray.concat(data_arrays, dim=tickers_index) if len(tickers) < len(requested_tickers): result = result.reindex(tickers=requested_tickers, fields=requested_fields) # the DataArray gets a name after the first ticker in the tickers_data_dict.keys() which is incorrect; # it should have no name result.name = None return result
def _aggregate_intraday_data(self, data_array, start_date: datetime, end_date: datetime, tickers: Sequence[Ticker], fields, frequency: Frequency): """ Function, which aggregates the intraday data array for various dates and returns a new data array with data sampled with the given frequency. """ # If the data is of intraday data type, which spans over multiple days, the base parameter of resample() # function should be adjusted differently for the first day. # # Therefore, the data array is divided into two separate arrays data_array_1, data_array_2 - first containing # only the first day, and the second one - containing all other dates. end_of_day = start_date + RelativeDelta(hour=23, minute=59, second=59) _end_date = end_of_day if (end_of_day < end_date) else end_date # Get both parts of the data array data_array_1 = data_array.loc[start_date:_end_date, :, :] data_array_2 = data_array.loc[end_of_day:end_date, :, :] if len(data_array_1) > 0: base_data_array_1 = pd.to_datetime( data_array_1[DATES].values[0]).minute data_array_1 = data_array_1.resample( dates=frequency.to_pandas_freq(), base=base_data_array_1, label='left', skipna=True).apply( lambda x: self._aggregate_data_array(x, tickers, fields)) if len(data_array_2) > 0: base_data_array_2 = MarketOpenEvent.trigger_time().minute data_array_2 = data_array_2.resample( dates=frequency.to_pandas_freq(), base=base_data_array_2, label='left', skipna=True).apply( lambda x: self._aggregate_data_array(x, tickers, fields)) data_array = QFDataArray.concat([data_array_1, data_array_2], dim=DATES) return data_array