def check_output(self, expr, expected): result = expr._compute( [self.fake_raw_data[input_] for input_ in expr.inputs], self.mask.index, self.mask.columns, self.mask.values, ) check_allclose(result, expected)
def test_rsi(self, seed_value, expected): rsi = RSI() today = np.datetime64(1, "ns") assets = np.arange(3) out = np.empty((3, ), dtype=float) np.random.seed(seed_value) # Seed so we get deterministic results. test_data = np.abs(np.random.randn(15, 3)) out = np.empty((3, ), dtype=float) rsi.compute(today, assets, out, test_data) check_allclose(expected, out)
def test_rsi(self, seed_value, expected): rsi = RSI() today = np.datetime64(1, 'ns') assets = np.arange(3) out = np.empty((3,), dtype=float) np.random.seed(seed_value) # Seed so we get deterministic results. test_data = np.abs(np.random.randn(15, 3)) out = np.empty((3,), dtype=float) rsi.compute(today, assets, out, test_data) check_allclose(expected, out)
def test_rsi(self, seed_value, expected): rsi = RSI() today = datetime64(1, 'ns') assets = arange(3) out = empty((3, ), dtype=float) seed(seed_value) # Seed so we get deterministic results. test_data = abs(randn(15, 3)) out = empty((3, ), dtype=float) rsi.compute(today, assets, out, test_data) check_allclose(expected, out)
def test_returns(self, seed_value, window_length): returns = Returns(window_length=window_length) today = datetime64(1, 'ns') assets = arange(3) out = empty((3,), dtype=float) seed(seed_value) # Seed so we get deterministic results. test_data = abs(randn(window_length, 3)) # Calculate the expected returns expected = (test_data[-1] - test_data[0]) / test_data[0] out = empty((3,), dtype=float) returns.compute(today, assets, out, test_data) check_allclose(expected, out)
def test_returns(self, seed_value, window_length): returns = Returns(window_length=window_length) today = datetime64(1, 'ns') assets = arange(3) out = empty((3, ), dtype=float) seed(seed_value) # Seed so we get deterministic results. test_data = abs(randn(window_length, 3)) # Calculate the expected returns expected = (test_data[-1] - test_data[0]) / test_data[0] out = empty((3, ), dtype=float) returns.compute(today, assets, out, test_data) check_allclose(expected, out)
def test_normalizations_hand_computed(self): """ Test the hand-computed example in factor.demean. """ f = self.f m = Mask() c = C() str_c = C(dtype=categorical_dtype, missing_value=None) factor_data = array([[1.0, 2.0, 3.0, 4.0], [1.5, 2.5, 3.5, 1.0], [2.0, 3.0, 4.0, 1.5], [2.5, 3.5, 1.0, 2.0]], ) filter_data = array( [[False, True, True, True], [True, False, True, True], [True, True, False, True], [True, True, True, False]], dtype=bool, ) classifier_data = array( [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], dtype=int64_dtype, ) string_classifier_data = LabelArray( classifier_data.astype(str).astype(object), missing_value=None, ) terms = { 'vanilla': f.demean(), 'masked': f.demean(mask=m), 'grouped': f.demean(groupby=c), 'grouped_str': f.demean(groupby=str_c), 'grouped_masked': f.demean(mask=m, groupby=c), 'grouped_masked_str': f.demean(mask=m, groupby=str_c), } expected = { 'vanilla': array([[-1.500, -0.500, 0.500, 1.500], [-0.625, 0.375, 1.375, -1.125], [-0.625, 0.375, 1.375, -1.125], [0.250, 1.250, -1.250, -0.250]], ), 'masked': array( [[nan, -1.000, 0.000, 1.000], [-0.500, nan, 1.500, -1.000], [-0.166, 0.833, nan, -0.666], [0.166, 1.166, -1.333, nan]], ), 'grouped': array([[-0.500, 0.500, -0.500, 0.500], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, -0.500, 0.500]], ), 'grouped_masked': array([[nan, 0.000, -0.500, 0.500], [0.000, nan, 1.250, -1.250], [-0.500, 0.500, nan, 0.000], [-0.500, 0.500, 0.000, nan]]) } # Changing the classifier dtype shouldn't affect anything. expected['grouped_str'] = expected['grouped'] expected['grouped_masked_str'] = expected['grouped_masked'] graph = TermGraph(terms) results = self.run_graph( graph, initial_workspace={ f: factor_data, c: classifier_data, str_c: string_classifier_data, m: filter_data, }, mask=self.build_mask(self.ones_mask(shape=factor_data.shape)), ) for key, (res, exp) in dzip_exact(results, expected).items(): check_allclose( res, exp, # The hand-computed values aren't very precise (in particular, # we truncate repeating decimals at 3 places) This is just # asserting that the example isn't misleading by being totally # wrong. atol=0.001, err_msg="Mismatch for %r" % key)
def test_normalizations_hand_computed(self): """ Test the hand-computed example in factor.demean. """ f = self.f m = Mask() c = C() str_c = C(dtype=categorical_dtype, missing_value=None) factor_data = array( [[1.0, 2.0, 3.0, 4.0], [1.5, 2.5, 3.5, 1.0], [2.0, 3.0, 4.0, 1.5], [2.5, 3.5, 1.0, 2.0]], ) filter_data = array( [[False, True, True, True], [True, False, True, True], [True, True, False, True], [True, True, True, False]], dtype=bool, ) classifier_data = array( [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], dtype=int64_dtype, ) string_classifier_data = LabelArray( classifier_data.astype(str).astype(object), missing_value=None, ) terms = { 'vanilla': f.demean(), 'masked': f.demean(mask=m), 'grouped': f.demean(groupby=c), 'grouped_str': f.demean(groupby=str_c), 'grouped_masked': f.demean(mask=m, groupby=c), 'grouped_masked_str': f.demean(mask=m, groupby=str_c), } expected = { 'vanilla': array( [[-1.500, -0.500, 0.500, 1.500], [-0.625, 0.375, 1.375, -1.125], [-0.625, 0.375, 1.375, -1.125], [0.250, 1.250, -1.250, -0.250]], ), 'masked': array( [[nan, -1.000, 0.000, 1.000], [-0.500, nan, 1.500, -1.000], [-0.166, 0.833, nan, -0.666], [0.166, 1.166, -1.333, nan]], ), 'grouped': array( [[-0.500, 0.500, -0.500, 0.500], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, -0.500, 0.500]], ), 'grouped_masked': array( [[nan, 0.000, -0.500, 0.500], [0.000, nan, 1.250, -1.250], [-0.500, 0.500, nan, 0.000], [-0.500, 0.500, 0.000, nan]] ) } # Changing the classifier dtype shouldn't affect anything. expected['grouped_str'] = expected['grouped'] expected['grouped_masked_str'] = expected['grouped_masked'] graph = TermGraph(terms) results = self.run_graph( graph, initial_workspace={ f: factor_data, c: classifier_data, str_c: string_classifier_data, m: filter_data, }, mask=self.build_mask(self.ones_mask(shape=factor_data.shape)), ) for key, (res, exp) in dzip_exact(results, expected).items(): check_allclose( res, exp, # The hand-computed values aren't very precise (in particular, # we truncate repeating decimals at 3 places) This is just # asserting that the example isn't misleading by being totally # wrong. atol=0.001, err_msg="Mismatch for %r" % key )