def _get_coefficients_and_current_regressors_tickers(self, regressors_tickers: List[Ticker], positions_tickers: List[Ticker],
                                                      positions_allocation: QFSeries, from_date: datetime,
                                                      to_date: datetime):
     tickers = [*positions_tickers, *regressors_tickers]
     data = self._data_provider.get_price(tickers, PriceField.Close, from_date, to_date).to_simple_returns()
     dc = DataCleaner(data, 0.1)
     clean_data = dc.proxy_using_value(0)
     positions_returns = clean_data.reindex(columns=positions_tickers,
                                            fill_value=0)  # we expect the same dim as positions_allocation series
     regressors_returns = clean_data.reindex(columns=regressors_tickers).dropna(axis=1,
                                                                                how='all')  # missing regressors should be removed
     portfolio_returns = positions_returns.dot(positions_allocation.values)
     return self._get_coefficients(regressors_returns, portfolio_returns), regressors_returns.columns
Exemple #2
0
    def _preprocess_data(self, analysed_tms, regressors_df):
        """
        Cleans the data before they are processed (e.g. removes regressors containing too many missing data,
        proxies missing data).
        """

        self.logger.debug("Length of input timeseries: {:d} \n".format(len(analysed_tms)))

        data_cleaner = DataCleaner(regressors_df)
        common_regressors_df = data_cleaner.proxy_using_regression(analysed_tms, columns_type=SimpleReturnsSeries)
        common_regressors_df, common_analysed_tms = get_values_for_common_dates(common_regressors_df, analysed_tms)

        self.logger.debug("Length of preprocessed timeseries: {:d}".format(common_analysed_tms.size))
        self.logger.debug("Number of regressors: {:d}".format(common_regressors_df.shape[1]))

        return common_regressors_df, common_analysed_tms
Exemple #3
0
 def setUp(self):
     self.test_dataframe = self._create_test_dataframe()
     self.test_benchmark = self._create_test_benchmark()
     self.data_cleaner = DataCleaner(self.test_dataframe)
Exemple #4
0
class TestDataCleaner(TestCase):
    def setUp(self):
        self.test_dataframe = self._create_test_dataframe()
        self.test_benchmark = self._create_test_benchmark()
        self.data_cleaner = DataCleaner(self.test_dataframe)

    @classmethod
    def _create_test_dataframe(cls):
        values = [[np.nan, 0.0, 0.0, 0.0, 0.0], [1.0, np.nan, 1.0, 1.0, 1.0],
                  [2.0, np.nan, np.nan, 2.0, 2.0],
                  [3.0, 3.0, 3.0, np.nan, 3.0], [4.0, 4.0, 4.0, 4.0, 4.0],
                  [5.0, 5.0, 5.0, 5.0, 5.0]]

        index = pd.date_range(start='2015-01-01', periods=6)
        columns = ['a', 'b', 'c', 'd', 'e']
        dataframe = SimpleReturnsDataFrame(data=values,
                                           index=index,
                                           columns=columns)

        return dataframe

    @classmethod
    def _create_test_benchmark(cls):
        values = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
        index = pd.date_range(start='2015-01-02', periods=6)

        return SimpleReturnsSeries(data=values,
                                   index=index,
                                   name='Test prices')

    def test_proxy_using_values(self):
        expected_values = [[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                           [2.0, 0.0, 2.0, 2.0], [3.0, 3.0, 0.0, 3.0],
                           [4.0, 4.0, 4.0, 4.0], [5.0, 5.0, 5.0, 5.0]]
        expected_columns = ['a', 'c', 'd', 'e']
        expected_dates = self.test_dataframe.index.copy()
        expected_dataframe = SimpleReturnsDataFrame(data=expected_values,
                                                    columns=expected_columns,
                                                    index=expected_dates)
        self.data_cleaner.threshold = 0.2

        actual_dataframe = self.data_cleaner.proxy_using_value(proxy_value=0.0)

        assert_dataframes_equal(expected_dataframe, actual_dataframe)

    def test_proxy_using_regression(self):
        expected_values = [[np.nan, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                           [2.0, 2.0, 2.0, 2.0], [3.0, 3.0, 3.0, 3.0],
                           [4.0, 4.0, 4.0, 4.0], [5.0, 5.0, 5.0, 5.0]]
        expected_columns = ['a', 'c', 'd', 'e']
        expected_dates = self.test_dataframe.index.copy()
        expected_dataframe = SimpleReturnsDataFrame(data=expected_values,
                                                    columns=expected_columns,
                                                    index=expected_dates)
        self.data_cleaner.threshold = 0.2

        actual_dataframe = self.data_cleaner.proxy_using_regression(
            benchmark_tms=self.test_benchmark,
            columns_type=SimpleReturnsSeries)

        assert_dataframes_equal(expected_dataframe, actual_dataframe)