コード例 #1
0
def test_calc_series_stats(series, norm, expected):
    result = calc_series_stats(series, norm=norm)
    assert type(result) == dict
    assert len(result) == len(expected)
    assert result.keys() == expected.keys()
    for key in result:
        assert np.isclose(result[key], expected[key])
        
    if norm == False:
        np.random.seed(0)
        np.random.shuffle(series)
        result = calc_series_stats(series, norm=norm)
        for key in result:
            assert np.isclose(result[key], expected[key])
コード例 #2
0
    def test_calculate(self, data_loader, tickers, columns, 
                       agg_day_counts, max_back_quarter):
        fc = DailyAggQuarterFeatures(columns=columns,
                                     agg_day_counts=agg_day_counts,
                                     max_back_quarter=max_back_quarter)
                            
        X = fc.calculate(data_loader, tickers)
                
        assert type(X) == pd.DataFrame
        assert 'ticker' in X.index.names
        assert 'date' in X.index.names
       
        assert X.shape[0] <= max_back_quarter * len(tickers)     
        assert X.shape[1] == len(calc_series_stats([])) * \
                             len(columns) * len(agg_day_counts)
                                    

        for col in columns:
            for count in agg_day_counts:
                min_col = 'days{}_{}_min'.format(count, col)
                max_col = 'days{}_{}_max'.format(count, col)
                mean_col = 'days{}_{}_mean'.format(count, col)
                median_col = 'days{}_{}_median'.format(count, col)
                assert (X[max_col] >= X[min_col]).min()                
                assert (X[max_col] >= X[mean_col]).min()                
                assert (X[max_col] >= X[median_col]).min()                
                assert (X[mean_col] >= X[min_col]).min()                
                assert (X[median_col] >= X[min_col]).min()  
コード例 #3
0
    def test_calculate_dayly_index(self, data, tickers, columns,
                                   agg_day_counts, max_back_quarter):
        # Instead of real commodities to avoid extra dataloaders
        commodities_codes = ['AAPL', 'MSFT']
        fc = DailyAggQuarterFeatures(daily_data_key='daily',
                                     quarterly_data_key='quarterly',
                                     columns=columns,
                                     agg_day_counts=agg_day_counts,
                                     max_back_quarter=max_back_quarter,
                                     daily_index=commodities_codes)

        X = fc.calculate(data, tickers)

        assert type(X) == pd.DataFrame
        assert 'ticker' in X.index.names
        assert 'date' in X.index.names

        assert X.shape[0] <= max_back_quarter * len(tickers)
        assert X.shape[1] == len(calc_series_stats([])) * \
                             len(columns) * len(agg_day_counts) *\
                             len(commodities_codes)

        for code in commodities_codes:
            for col in columns:
                for count in agg_day_counts:
                    min_col = '{}_days{}_{}_min'.format(code, count, col)
                    max_col = '{}_days{}_{}_max'.format(code, count, col)
                    mean_col = '{}_days{}_{}_mean'.format(code, count, col)
                    median_col = '{}_days{}_{}_median'.format(code, count, col)
                    assert (X[max_col] >= X[min_col]).min()
                    assert (X[max_col] >= X[mean_col]).min()
                    assert (X[max_col] >= X[median_col]).min()
                    assert (X[mean_col] >= X[min_col]).min()
                    assert (X[median_col] >= X[min_col]).min()
コード例 #4
0
    def test_calculate(self, data, tickers, columns, quarter_counts,
                       max_back_quarter):
        fc = QuarterlyFeatures(data_key='quarterly',
                               columns=columns,
                               quarter_counts=quarter_counts,
                               max_back_quarter=max_back_quarter)

        X = fc.calculate(data, tickers)

        assert type(X) == pd.DataFrame
        assert 'ticker' in X.index.names
        assert 'date' in X.index.names

        if type(data['quarterly']) == GenQuarterlyData:
            assert X.shape[0] == max_back_quarter * len(tickers)
        else:
            assert X.shape[0] <= max_back_quarter * len(tickers)

        assert X.shape[1] == 2 * len(calc_series_stats([])) * \
                             len(columns) * len(quarter_counts)

        # Minimum can not be lower with reduction of quarter_count
        sorted_quarter_counts = np.sort(quarter_counts)
        for col in columns:
            for k in range(len(sorted_quarter_counts) - 1):
                lower_count = sorted_quarter_counts[k]
                higher_count = sorted_quarter_counts[k + 1]
                l_col = 'quarter{}_{}_min'.format(lower_count, col)
                h_col = 'quarter{}_{}_min'.format(higher_count, col)

                assert (X[h_col] <= X[l_col]).min()

        # Maximum can not be higher with reduction of quarter_count
        sorted_quarter_counts = np.sort(quarter_counts)
        for col in columns:
            for k in range(len(sorted_quarter_counts) - 1):
                lower_count = sorted_quarter_counts[k]
                higher_count = sorted_quarter_counts[k + 1]
                l_col = 'quarter{}_{}_max'.format(lower_count, col)
                h_col = 'quarter{}_{}_max'.format(higher_count, col)

                assert (X[h_col] >= X[l_col]).min()

        std_cols = [x for x in X.columns if '_std' in x]
        for col in std_cols:
            assert X[col].min() >= 0

        for col in columns:
            for count in quarter_counts:
                min_col = 'quarter{}_{}_min'.format(count, col)
                max_col = 'quarter{}_{}_max'.format(count, col)
                mean_col = 'quarter{}_{}_mean'.format(count, col)
                median_col = 'quarter{}_{}_median'.format(count, col)
                assert (X[max_col] >= X[min_col]).min()
                assert (X[max_col] >= X[mean_col]).min()
                assert (X[max_col] >= X[median_col]).min()
                assert (X[mean_col] >= X[min_col]).min()
                assert (X[median_col] >= X[min_col]).min()
コード例 #5
0
def test_calc_series_stats_nans():
    assert calc_series_stats([np.nan, 10, 0, 1]) == calc_series_stats([10, 0, 1])
    assert calc_series_stats([None, 10, 0, 1]) == calc_series_stats([10, 0, 1])
    assert calc_series_stats([10, 0, np.nan, 1]) == calc_series_stats([10, 0, 1])
    
    result = calc_series_stats([])
    for key in result:
        assert np.isnan(result[key])
        
    result = calc_series_stats([np.nan, None])
    for key in result:
        assert np.isnan(result[key])