def test_calc_series_stats(series, norm, expected):
    result = calc_series_stats(series, norm=norm)
    assert type(result) == dict
    assert len(result) == len(expected)
    assert result.keys() == expected.keys()
    for key in result:
        assert np.isclose(result[key], expected[key])

    if norm == False:
        np.random.seed(0)
        np.random.shuffle(series)
        result = calc_series_stats(series, norm=norm)
        for key in result:
            assert np.isclose(result[key], expected[key])
    def test_calculate(self, data_loader, tickers, columns, agg_day_counts,
                       max_back_quarter):
        fc = DailyAggQuarterFeatures(columns=columns,
                                     agg_day_counts=agg_day_counts,
                                     max_back_quarter=max_back_quarter)

        X = fc.calculate(data_loader, tickers)

        assert type(X) == pd.DataFrame
        assert 'ticker' in X.index.names
        assert 'date' in X.index.names

        assert X.shape[0] <= max_back_quarter * len(tickers)
        assert X.shape[1] == len(calc_series_stats([])) * \
                             len(columns) * len(agg_day_counts)

        for col in columns:
            for count in agg_day_counts:
                min_col = 'days{}_{}_min'.format(count, col)
                max_col = 'days{}_{}_max'.format(count, col)
                mean_col = 'days{}_{}_mean'.format(count, col)
                median_col = 'days{}_{}_median'.format(count, col)
                assert (X[max_col] >= X[min_col]).min()
                assert (X[max_col] >= X[mean_col]).min()
                assert (X[max_col] >= X[median_col]).min()
                assert (X[mean_col] >= X[min_col]).min()
                assert (X[median_col] >= X[min_col]).min()
Esempio n. 3
0
    def test_calculate(self, tickers, columns, quarter_counts,
                       max_back_quarter):
        fc = QuarterlyFeatures(columns=columns,
                               quarter_counts=quarter_counts,
                               max_back_quarter=max_back_quarter)

        loaders = [Data(columns), SF1Data(config['sf1_data_path'])]
        for data_loader in loaders:
            X = fc.calculate(data_loader, tickers)

            assert type(X) == pd.DataFrame
            assert 'ticker' in X.index.names
            assert 'date' in X.index.names

            if type(data_loader) == Data:
                assert X.shape[0] == max_back_quarter * len(tickers)
            else:
                assert X.shape[0] <= max_back_quarter * len(tickers)

            assert X.shape[1] == 2 * len(calc_series_stats([])) * \
                                 len(columns) * len(quarter_counts)

            # Minimum can not be lower with reduction of quarter_count
            sorted_quarter_counts = np.sort(quarter_counts)
            for col in columns:
                for k in range(len(sorted_quarter_counts) - 1):
                    lower_count = sorted_quarter_counts[k]
                    higher_count = sorted_quarter_counts[k + 1]
                    l_col = 'quarter{}_{}_min'.format(lower_count, col)
                    h_col = 'quarter{}_{}_min'.format(higher_count, col)

                    assert (X[h_col] <= X[l_col]).min()

            # Maximum can not be higher with reduction of quarter_count
            sorted_quarter_counts = np.sort(quarter_counts)
            for col in columns:
                for k in range(len(sorted_quarter_counts) - 1):
                    lower_count = sorted_quarter_counts[k]
                    higher_count = sorted_quarter_counts[k + 1]
                    l_col = 'quarter{}_{}_max'.format(lower_count, col)
                    h_col = 'quarter{}_{}_max'.format(higher_count, col)

                    assert (X[h_col] >= X[l_col]).min()

            std_cols = [x for x in X.columns if '_std' in x]
            for col in std_cols:
                assert X[col].min() >= 0

            for col in columns:
                for count in quarter_counts:
                    min_col = 'quarter{}_{}_min'.format(count, col)
                    max_col = 'quarter{}_{}_max'.format(count, col)
                    mean_col = 'quarter{}_{}_mean'.format(count, col)
                    median_col = 'quarter{}_{}_median'.format(count, col)
                    assert (X[max_col] >= X[min_col]).min()
                    assert (X[max_col] >= X[mean_col]).min()
                    assert (X[max_col] >= X[median_col]).min()
                    assert (X[mean_col] >= X[min_col]).min()
                    assert (X[median_col] >= X[min_col]).min()
def test_calc_series_stats_nans():
    assert calc_series_stats([np.nan, 10, 0,
                              1]) == calc_series_stats([10, 0, 1])
    assert calc_series_stats([None, 10, 0, 1]) == calc_series_stats([10, 0, 1])
    assert calc_series_stats([10, 0, np.nan,
                              1]) == calc_series_stats([10, 0, 1])

    result = calc_series_stats([])
    for key in result:
        assert np.isnan(result[key])

    result = calc_series_stats([np.nan, None])
    for key in result:
        assert np.isnan(result[key])