Esempio n. 1
0
 def check_output(self, expr, expected):
     result = expr._compute(
         [self.fake_raw_data[input_] for input_ in expr.inputs],
         self.mask.index,
         self.mask.columns,
         self.mask.values,
     )
     check_allclose(result, expected)
 def check_output(self, expr, expected):
     result = expr._compute(
         [self.fake_raw_data[input_] for input_ in expr.inputs],
         self.mask.index,
         self.mask.columns,
         self.mask.values,
     )
     check_allclose(result, expected)
Esempio n. 3
0
    def test_rsi(self, seed_value, expected):

        rsi = RSI()

        today = np.datetime64(1, "ns")
        assets = np.arange(3)
        out = np.empty((3, ), dtype=float)

        np.random.seed(seed_value)  # Seed so we get deterministic results.
        test_data = np.abs(np.random.randn(15, 3))

        out = np.empty((3, ), dtype=float)
        rsi.compute(today, assets, out, test_data)

        check_allclose(expected, out)
    def test_rsi(self, seed_value, expected):

        rsi = RSI()

        today = np.datetime64(1, 'ns')
        assets = np.arange(3)
        out = np.empty((3,), dtype=float)

        np.random.seed(seed_value)  # Seed so we get deterministic results.
        test_data = np.abs(np.random.randn(15, 3))

        out = np.empty((3,), dtype=float)
        rsi.compute(today, assets, out, test_data)

        check_allclose(expected, out)
Esempio n. 5
0
    def test_rsi(self, seed_value, expected):

        rsi = RSI()

        today = datetime64(1, 'ns')
        assets = arange(3)
        out = empty((3, ), dtype=float)

        seed(seed_value)  # Seed so we get deterministic results.
        test_data = abs(randn(15, 3))

        out = empty((3, ), dtype=float)
        rsi.compute(today, assets, out, test_data)

        check_allclose(expected, out)
    def test_returns(self, seed_value, window_length):

        returns = Returns(window_length=window_length)

        today = datetime64(1, 'ns')
        assets = arange(3)
        out = empty((3,), dtype=float)

        seed(seed_value)  # Seed so we get deterministic results.
        test_data = abs(randn(window_length, 3))

        # Calculate the expected returns
        expected = (test_data[-1] - test_data[0]) / test_data[0]

        out = empty((3,), dtype=float)
        returns.compute(today, assets, out, test_data)

        check_allclose(expected, out)
Esempio n. 7
0
    def test_returns(self, seed_value, window_length):

        returns = Returns(window_length=window_length)

        today = datetime64(1, 'ns')
        assets = arange(3)
        out = empty((3, ), dtype=float)

        seed(seed_value)  # Seed so we get deterministic results.
        test_data = abs(randn(window_length, 3))

        # Calculate the expected returns
        expected = (test_data[-1] - test_data[0]) / test_data[0]

        out = empty((3, ), dtype=float)
        returns.compute(today, assets, out, test_data)

        check_allclose(expected, out)
Esempio n. 8
0
    def test_normalizations_hand_computed(self):
        """
        Test the hand-computed example in factor.demean.
        """
        f = self.f
        m = Mask()
        c = C()
        str_c = C(dtype=categorical_dtype, missing_value=None)

        factor_data = array([[1.0, 2.0, 3.0, 4.0], [1.5, 2.5, 3.5, 1.0],
                             [2.0, 3.0, 4.0, 1.5], [2.5, 3.5, 1.0, 2.0]], )
        filter_data = array(
            [[False, True, True, True], [True, False, True, True],
             [True, True, False, True], [True, True, True, False]],
            dtype=bool,
        )
        classifier_data = array(
            [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]],
            dtype=int64_dtype,
        )
        string_classifier_data = LabelArray(
            classifier_data.astype(str).astype(object),
            missing_value=None,
        )

        terms = {
            'vanilla': f.demean(),
            'masked': f.demean(mask=m),
            'grouped': f.demean(groupby=c),
            'grouped_str': f.demean(groupby=str_c),
            'grouped_masked': f.demean(mask=m, groupby=c),
            'grouped_masked_str': f.demean(mask=m, groupby=str_c),
        }
        expected = {
            'vanilla':
            array([[-1.500, -0.500, 0.500, 1.500],
                   [-0.625, 0.375, 1.375, -1.125],
                   [-0.625, 0.375, 1.375, -1.125],
                   [0.250, 1.250, -1.250, -0.250]], ),
            'masked':
            array(
                [[nan, -1.000, 0.000, 1.000], [-0.500, nan, 1.500, -1.000],
                 [-0.166, 0.833, nan, -0.666], [0.166, 1.166, -1.333, nan]], ),
            'grouped':
            array([[-0.500, 0.500, -0.500, 0.500],
                   [-0.500, 0.500, 1.250, -1.250],
                   [-0.500, 0.500, 1.250, -1.250],
                   [-0.500, 0.500, -0.500, 0.500]], ),
            'grouped_masked':
            array([[nan, 0.000, -0.500, 0.500], [0.000, nan, 1.250, -1.250],
                   [-0.500, 0.500, nan, 0.000], [-0.500, 0.500, 0.000, nan]])
        }
        # Changing the classifier dtype shouldn't affect anything.
        expected['grouped_str'] = expected['grouped']
        expected['grouped_masked_str'] = expected['grouped_masked']

        graph = TermGraph(terms)
        results = self.run_graph(
            graph,
            initial_workspace={
                f: factor_data,
                c: classifier_data,
                str_c: string_classifier_data,
                m: filter_data,
            },
            mask=self.build_mask(self.ones_mask(shape=factor_data.shape)),
        )

        for key, (res, exp) in dzip_exact(results, expected).items():
            check_allclose(
                res,
                exp,
                # The hand-computed values aren't very precise (in particular,
                # we truncate repeating decimals at 3 places) This is just
                # asserting that the example isn't misleading by being totally
                # wrong.
                atol=0.001,
                err_msg="Mismatch for %r" % key)
Esempio n. 9
0
    def test_normalizations_hand_computed(self):
        """
        Test the hand-computed example in factor.demean.
        """
        f = self.f
        m = Mask()
        c = C()
        str_c = C(dtype=categorical_dtype, missing_value=None)

        factor_data = array(
            [[1.0, 2.0, 3.0, 4.0],
             [1.5, 2.5, 3.5, 1.0],
             [2.0, 3.0, 4.0, 1.5],
             [2.5, 3.5, 1.0, 2.0]],
        )
        filter_data = array(
            [[False, True, True, True],
             [True, False, True, True],
             [True, True, False, True],
             [True, True, True, False]],
            dtype=bool,
        )
        classifier_data = array(
            [[1, 1, 2, 2],
             [1, 1, 2, 2],
             [1, 1, 2, 2],
             [1, 1, 2, 2]],
            dtype=int64_dtype,
        )
        string_classifier_data = LabelArray(
            classifier_data.astype(str).astype(object),
            missing_value=None,
        )

        terms = {
            'vanilla': f.demean(),
            'masked': f.demean(mask=m),
            'grouped': f.demean(groupby=c),
            'grouped_str': f.demean(groupby=str_c),
            'grouped_masked': f.demean(mask=m, groupby=c),
            'grouped_masked_str': f.demean(mask=m, groupby=str_c),
        }
        expected = {
            'vanilla': array(
                [[-1.500, -0.500,  0.500,  1.500],
                 [-0.625,  0.375,  1.375, -1.125],
                 [-0.625,  0.375,  1.375, -1.125],
                 [0.250,   1.250, -1.250, -0.250]],
            ),
            'masked': array(
                [[nan,    -1.000,  0.000,  1.000],
                 [-0.500,    nan,  1.500, -1.000],
                 [-0.166,  0.833,    nan, -0.666],
                 [0.166,   1.166, -1.333,    nan]],
            ),
            'grouped': array(
                [[-0.500, 0.500, -0.500,  0.500],
                 [-0.500, 0.500,  1.250, -1.250],
                 [-0.500, 0.500,  1.250, -1.250],
                 [-0.500, 0.500, -0.500,  0.500]],
            ),
            'grouped_masked': array(
                [[nan,     0.000, -0.500,  0.500],
                 [0.000,     nan,  1.250, -1.250],
                 [-0.500,  0.500,    nan,  0.000],
                 [-0.500,  0.500,  0.000,    nan]]
            )
        }
        # Changing the classifier dtype shouldn't affect anything.
        expected['grouped_str'] = expected['grouped']
        expected['grouped_masked_str'] = expected['grouped_masked']

        graph = TermGraph(terms)
        results = self.run_graph(
            graph,
            initial_workspace={
                f: factor_data,
                c: classifier_data,
                str_c: string_classifier_data,
                m: filter_data,
            },
            mask=self.build_mask(self.ones_mask(shape=factor_data.shape)),
        )

        for key, (res, exp) in dzip_exact(results, expected).items():
            check_allclose(
                res,
                exp,
                # The hand-computed values aren't very precise (in particular,
                # we truncate repeating decimals at 3 places) This is just
                # asserting that the example isn't misleading by being totally
                # wrong.
                atol=0.001,
                err_msg="Mismatch for %r" % key
            )