Beispiel #1
0
    def test_data_arrays_concat_on_tickers(self):
        ticker_1 = BloombergTicker("Example 1")
        ticker_2 = BloombergTicker("Example 2")
        fields = [PriceField.Open, PriceField.Close]
        index = date_range(start=str_to_date("2017-01-01"),
                           periods=5,
                           freq="D")

        index_1 = index[:3]
        data_1 = [[[4., 1.]], [[5., 2.]], [[6., 3.]]]

        data_array_1 = QFDataArray.create(index_1, [ticker_1], fields, data_1)
        self.assertEqual(np.dtype("float64"), data_array_1.dtype)

        index_2 = index[3:]
        data_2 = [[[np.nan, 10.]], [[np.nan, 14.]]]

        data_array_2 = QFDataArray.create(index_2, [ticker_2], fields, data_2)
        self.assertEqual(np.dtype("float64"), data_array_2.dtype)

        data = [[[4., 1.], [np.nan, np.nan]], [[5., 2.], [np.nan, np.nan]],
                [[6., 3.], [np.nan, np.nan]], [[np.nan, np.nan], [np.nan,
                                                                  10.]],
                [[np.nan, np.nan], [np.nan, 14.]]]
        expected_data_array = QFDataArray.create(index, [ticker_1, ticker_2],
                                                 fields, data)
        self.assertEqual(np.dtype("float64"), expected_data_array.dtype)

        concatenated_data_array = QFDataArray.concat(
            [data_array_1, data_array_2], dim=TICKERS)
        self.assertEqual(np.dtype("float64"), concatenated_data_array.dtype)

        assert_equal(expected_data_array, concatenated_data_array)
Beispiel #2
0
    def _get_data_for_multiple_tickers(self, tickers, fields, start_date, end_date, use_prices_types):
        if use_prices_types:
            type_of_field = PriceField

            def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider):
                prices = data_prov.get_price(tickers_for_single_data_provider, fields, start_date, end_date)
                return prices
        else:
            type_of_field = str

            def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider):
                prices = data_prov.get_history(tickers_for_single_data_provider, fields, start_date, end_date)
                return prices

        tickers, got_single_ticker = convert_to_list(tickers, Ticker)
        fields, got_single_field = convert_to_list(fields, type_of_field)
        got_single_date = start_date is not None and (start_date == end_date)
        partial_results = []

        for ticker_class, ticker_group in groupby(tickers, lambda t: type(t)):
            data_provider = self._identify_data_provider(ticker_class)

            partial_result = get_data_func(data_provider, list(ticker_group))
            if partial_result is not None:
                partial_results.append(partial_result)

        result = QFDataArray.concat(partial_results, dim=TICKERS)
        normalized_result = normalize_data_array(
            result, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types)
        return normalized_result
Beispiel #3
0
def tickers_dict_to_data_array(tickers_data_dict: Dict[Ticker, QFDataFrame],
                               requested_tickers: Union[Ticker, Sequence[Ticker]], requested_fields) -> QFDataArray:
    """
    Converts a dictionary mapping tickers to DateFrame onto a QFDataArray.

    Parameters
    ----------
    tickers_data_dict:  Dict[Ticker, QFDataFrame]
        Ticker -> QFDataFrame[dates, fields]
    requested_tickers: Sequence[Ticker]
    requested_fields

    Returns
    -------
    QFDataArray
    """
    # return empty xr.DataArray if there is no data to be converted
    requested_tickers, _ = convert_to_list(requested_tickers, Ticker)

    if not tickers_data_dict:
        return QFDataArray.create(dates=[], tickers=requested_tickers, fields=requested_fields)

    tickers = []
    data_arrays = []
    for ticker, df in tickers_data_dict.items():
        df.index.name = DATES
        if df.empty:  # if there is no data for a given ticker, skip it (proper column will be added afterwards anyway)
            continue

        data_array = df.to_xarray()
        data_array = data_array.to_array(dim=FIELDS, name=ticker)
        data_array = data_array.transpose(DATES, FIELDS)

        tickers.append(ticker)
        data_arrays.append(data_array)

    tickers_index = pd.Index(tickers, name=TICKERS)
    if not data_arrays:
        return QFDataArray.create(dates=[], tickers=requested_tickers, fields=requested_fields)
    result = QFDataArray.concat(data_arrays, dim=tickers_index)

    if len(tickers) < len(requested_tickers):
        result = result.reindex(tickers=requested_tickers, fields=requested_fields)

    # the DataArray gets a name after the first ticker in the tickers_data_dict.keys() which is incorrect;
    # it should have no name
    result.name = None

    return result
    def _aggregate_intraday_data(self, data_array, start_date: datetime,
                                 end_date: datetime, tickers: Sequence[Ticker],
                                 fields, frequency: Frequency):
        """
        Function, which aggregates the intraday data array for various dates and returns a new data array with data
        sampled with the given frequency.
        """

        # If the data is of intraday data type, which spans over multiple days, the base parameter of resample()
        # function should be adjusted differently for the first day.
        #
        # Therefore, the data array is divided into two separate arrays data_array_1, data_array_2 - first containing
        # only the first day, and the second one - containing all other dates.

        end_of_day = start_date + RelativeDelta(hour=23, minute=59, second=59)
        _end_date = end_of_day if (end_of_day < end_date) else end_date

        # Get both parts of the data array
        data_array_1 = data_array.loc[start_date:_end_date, :, :]
        data_array_2 = data_array.loc[end_of_day:end_date, :, :]

        if len(data_array_1) > 0:
            base_data_array_1 = pd.to_datetime(
                data_array_1[DATES].values[0]).minute
            data_array_1 = data_array_1.resample(
                dates=frequency.to_pandas_freq(),
                base=base_data_array_1,
                label='left',
                skipna=True).apply(
                    lambda x: self._aggregate_data_array(x, tickers, fields))

        if len(data_array_2) > 0:
            base_data_array_2 = MarketOpenEvent.trigger_time().minute
            data_array_2 = data_array_2.resample(
                dates=frequency.to_pandas_freq(),
                base=base_data_array_2,
                label='left',
                skipna=True).apply(
                    lambda x: self._aggregate_data_array(x, tickers, fields))

        data_array = QFDataArray.concat([data_array_1, data_array_2],
                                        dim=DATES)

        return data_array