Ejemplo n.º 1
0
 def compute(self, today, assets, out, returns, returns_slice):
     # Make sure that our slice is the correct shape (i.e. has only
     # one column) and that it has the same values as the original
     # returns factor from which it is derived.
     assert returns_slice.shape == (self.window_length, 1)
     assert returns.shape == (self.window_length, len(sids))
     check_arrays(returns_slice[:, 0], returns[:, my_asset_column])
Ejemplo n.º 2
0
 def test_make_cascading_boolean_array(self):
     check_arrays(
         make_cascading_boolean_array((3, 3)),
         array(
             [[True,   True, False],
              [True,  False, False],
              [False, False, False]]
         ),
     )
     check_arrays(
         make_cascading_boolean_array((3, 3), first_value=False),
         array(
             [[False, False, True],
              [False,  True, True],
              [True,   True, True]]
         ),
     )
     check_arrays(
         make_cascading_boolean_array((1, 3)),
         array([[True, True, False]]),
     )
     check_arrays(
         make_cascading_boolean_array((3, 1)),
         array([[False], [False], [False]]),
     )
     check_arrays(
         make_cascading_boolean_array((3, 0)),
         empty((3, 0), dtype=bool_dtype),
     )
Ejemplo n.º 3
0
    def test_window_safe(self, factor_len):
        # all true data set of (days, securities)
        data = full(self.default_shape, True, dtype=bool)

        class InputFilter(Filter):
            inputs = ()
            window_length = 0

        class TestFactor(CustomFactor):
            dtype = float64_dtype
            inputs = (InputFilter(), )
            window_length = factor_len

            def compute(self, today, assets, out, filter_):
                # sum for each column
                out[:] = np_sum(filter_, axis=0)

        results = self.run_graph(
            TermGraph({'windowsafe': TestFactor()}),
            initial_workspace={InputFilter(): data},
        )

        # number of days in default_shape
        n = self.default_shape[0]

        # shape of output array
        output_shape = ((n - factor_len + 1), self.default_shape[1])
        check_arrays(results['windowsafe'],
                     full(output_shape, factor_len, dtype=float64))
Ejemplo n.º 4
0
    def test_setitem_array(self):
        arr = LabelArray(self.strs, missing_value=None)
        orig_arr = arr.copy()

        # Write a row.
        self.assertFalse(
            (arr[0] == arr[1]).all(),
            "This test doesn't test anything because rows 0"
            " and 1 are already equal!"
        )
        arr[0] = arr[1]
        for i in range(arr.shape[1]):
            self.assertEqual(arr[0, i], arr[1, i])

        # Write a column.
        self.assertFalse(
            (arr[:, 0] == arr[:, 1]).all(),
            "This test doesn't test anything because columns 0"
            " and 1 are already equal!"
        )
        arr[:, 0] = arr[:, 1]
        for i in range(arr.shape[0]):
            self.assertEqual(arr[i, 0], arr[i, 1])

        # Write the whole array.
        arr[:] = orig_arr
        check_arrays(arr, orig_arr)
Ejemplo n.º 5
0
    def test_single_factor(self):
        loader = self.loader
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )
        result_shape = (num_dates, num_assets) = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        factor = RollingSumDifference()
        expected_result = -factor.window_length

        # Since every asset will pass the screen, these should be equivalent.
        pipelines = [
            Pipeline(columns={'f': factor}),
            Pipeline(
                columns={'f': factor},
                screen=factor.eq(expected_result),
            ),
        ]

        for p in pipelines:
            result = engine.run_pipeline(p, dates[0], dates[-1])
            self.assertEqual(set(result.columns), {'f'})
            assert_multi_index_is_product(
                self, result.index, dates, assets
            )

            check_arrays(
                result['f'].unstack().values,
                full(result_shape, expected_result, dtype=float),
            )
Ejemplo n.º 6
0
    def test_percentile_nasty_partitions(self):
        # Test percentile with nasty partitions: divide up 5 assets into
        # quartiles.
        # There isn't a nice mathematical definition of correct behavior here,
        # so for now we guarantee the behavior of numpy.nanpercentile.  This is
        # mostly for regression testing in case we write our own specialized
        # percentile calculation at some point in the future.

        data = arange(25, dtype=float).reshape(5, 5) % 4
        quartiles = range(4)
        filter_names = ['pct_' + str(q) for q in quartiles]

        graph = TermGraph({
            name: self.f.percentile_between(q * 25.0, (q + 1) * 25.0)
            for name, q in zip(filter_names, quartiles)
        })
        results = self.run_graph(
            graph,
            initial_workspace={self.f: data},
            mask=self.build_mask(ones((5, 5))),
        )

        for name, quartile in zip(filter_names, quartiles):
            result = results[name]
            lower = quartile * 25.0
            upper = (quartile + 1) * 25.0
            expected = and_(
                nanpercentile(data, lower, axis=1, keepdims=True) <= data,
                data <= nanpercentile(data, upper, axis=1, keepdims=True),
            )
            check_arrays(result, expected)
Ejemplo n.º 7
0
 def test_make_cascading_boolean_array(self):
     check_arrays(
         make_cascading_boolean_array((3, 3)),
         array(
             [[True,   True, False],
              [True,  False, False],
              [False, False, False]]
         ),
     )
     check_arrays(
         make_cascading_boolean_array((3, 3), first_value=False),
         array(
             [[False, False, True],
              [False,  True, True],
              [True,   True, True]]
         ),
     )
     check_arrays(
         make_cascading_boolean_array((1, 3)),
         array([[True, True, False]]),
     )
     check_arrays(
         make_cascading_boolean_array((3, 1)),
         array([[False], [False], [False]]),
     )
     check_arrays(
         make_cascading_boolean_array((3, 0)),
         empty((3, 0), dtype=bool_dtype),
     )
Ejemplo n.º 8
0
    def test_isnull_datetime_dtype(self):
        class DatetimeFactor(Factor):
            dtype = datetime64ns_dtype
            window_length = 0
            inputs = ()

        factor = DatetimeFactor()

        data = arange(25).reshape(5, 5).astype('datetime64[ns]')
        data[eye(5, dtype=bool)] = NaTns

        graph = TermGraph(
            {
                'isnull': factor.isnull(),
                'notnull': factor.notnull(),
            }
        )

        results = self.run_graph(
            graph,
            initial_workspace={factor: data},
            mask=self.build_mask(ones((5, 5))),
        )
        check_arrays(results['isnull'], eye(5, dtype=bool))
        check_arrays(results['notnull'], ~eye(5, dtype=bool))
Ejemplo n.º 9
0
 def test_overwrite_adjustment_cases(self, name, data, lookback,
                                     adjustments, missing_value, expected):
     array = AdjustedArray(data, NOMASK, adjustments, missing_value)
     for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
         window_iter = array.traverse(lookback)
         for yielded, expected_yield in zip_longest(window_iter, expected):
             check_arrays(yielded, expected_yield)
Ejemplo n.º 10
0
    def test_percentile_nasty_partitions(self):
        # Test percentile with nasty partitions: divide up 5 assets into
        # quartiles.
        # There isn't a nice mathematical definition of correct behavior here,
        # so for now we guarantee the behavior of numpy.nanpercentile.  This is
        # mostly for regression testing in case we write our own specialized
        # percentile calculation at some point in the future.

        data = arange(25, dtype=float).reshape(5, 5) % 4
        quartiles = range(4)
        filter_names = ['pct_' + str(q) for q in quartiles]

        graph = TermGraph(
            {
                name: self.f.percentile_between(q * 25.0, (q + 1) * 25.0)
                for name, q in zip(filter_names, quartiles)
            }
        )
        results = self.run_graph(
            graph,
            initial_workspace={self.f: data},
            mask=self.build_mask(ones((5, 5))),
        )

        for name, quartile in zip(filter_names, quartiles):
            result = results[name]
            lower = quartile * 25.0
            upper = (quartile + 1) * 25.0
            expected = and_(
                nanpercentile(data, lower, axis=1, keepdims=True) <= data,
                data <= nanpercentile(data, upper, axis=1, keepdims=True),
            )
            check_arrays(result, expected)
Ejemplo n.º 11
0
 def compute(self, today, assets, out, returns, returns_slice):
     # Make sure that our slice is the correct shape (i.e. has only
     # one column) and that it has the same values as the original
     # returns factor from which it is derived.
     assert returns_slice.shape == (self.window_length, 1)
     assert returns.shape == (self.window_length, len(sids))
     check_arrays(returns_slice[:, 0], returns[:, my_asset_column])
Ejemplo n.º 12
0
    def test_setitem_array(self):
        arr = LabelArray(self.strs, missing_value=None)
        orig_arr = arr.copy()

        # Write a row.
        self.assertFalse(
            (arr[0] == arr[1]).all(),
            "This test doesn't test anything because rows 0"
            " and 1 are already equal!"
        )
        arr[0] = arr[1]
        for i in range(arr.shape[1]):
            self.assertEqual(arr[0, i], arr[1, i])

        # Write a column.
        self.assertFalse(
            (arr[:, 0] == arr[:, 1]).all(),
            "This test doesn't test anything because columns 0"
            " and 1 are already equal!"
        )
        arr[:, 0] = arr[:, 1]
        for i in range(arr.shape[0]):
            self.assertEqual(arr[i, 0], arr[i, 1])

        # Write the whole array.
        arr[:] = orig_arr
        check_arrays(arr, orig_arr)
Ejemplo n.º 13
0
    def test_input_dates_provided_by_default(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )

        class TestFactor(CustomFactor):
            inputs = [InputDates(), USEquityPricing.close]
            window_length = 10
            dtype = datetime64ns_dtype

            def compute(self, today, assets, out, dates, closes):
                first, last = dates[[0, -1], 0]
                assert last == today.asm8
                assert len(dates) == len(closes) == self.window_length
                out[:] = first

        p = Pipeline(columns={'t': TestFactor()})
        results = engine.run_pipeline(p, self.dates[9], self.dates[10])

        # All results are the same, so just grab one column.
        column = results.unstack().iloc[:, 0].values
        check_arrays(column, self.dates[:2].values)
Ejemplo n.º 14
0
    def test_window_safe(self, factor_len):
        # all true data set of (days, securities)
        data = full(self.default_shape, True, dtype=bool)

        class InputFilter(Filter):
            inputs = ()
            window_length = 0

        class TestFactor(CustomFactor):
            dtype = float64_dtype
            inputs = (InputFilter(), )
            window_length = factor_len

            def compute(self, today, assets, out, filter_):
                # sum for each column
                out[:] = np_sum(filter_, axis=0)

        results = self.run_graph(
            TermGraph({'windowsafe': TestFactor()}),
            initial_workspace={InputFilter(): data},
        )

        # number of days in default_shape
        n = self.default_shape[0]

        # shape of output array
        output_shape = ((n - factor_len + 1), self.default_shape[1])
        check_arrays(
            results['windowsafe'],
            full(output_shape, factor_len, dtype=float64)
        )
Ejemplo n.º 15
0
    def test_masked_rankdata_2d(self, seed_value, method, use_mask,
                                set_missing, ascending):
        eyemask = ~eye(5, dtype=bool)
        nomask = ones((5, 5), dtype=bool)

        seed(seed_value)
        asfloat = (randn(5, 5) * seed_value)
        asdatetime = (asfloat).copy().view('datetime64[ns]')

        mask = eyemask if use_mask else nomask
        if set_missing:
            asfloat[:, 2] = nan
            asdatetime[:, 2] = NaTns

        float_result = masked_rankdata_2d(
            data=asfloat,
            mask=mask,
            missing_value=nan,
            method=method,
            ascending=True,
        )
        datetime_result = masked_rankdata_2d(
            data=asdatetime,
            mask=mask,
            missing_value=NaTns,
            method=method,
            ascending=True,
        )

        check_arrays(float_result, datetime_result)
Ejemplo n.º 16
0
    def test_masked_rankdata_2d(self,
                                seed_value,
                                method,
                                use_mask,
                                set_missing,
                                ascending):
        eyemask = ~eye(5, dtype=bool)
        nomask = ones((5, 5), dtype=bool)

        seed(seed_value)
        asfloat = (randn(5, 5) * seed_value)
        asdatetime = (asfloat).copy().view('datetime64[ns]')

        mask = eyemask if use_mask else nomask
        if set_missing:
            asfloat[:, 2] = nan
            asdatetime[:, 2] = NaTns

        float_result = masked_rankdata_2d(
            data=asfloat,
            mask=mask,
            missing_value=nan,
            method=method,
            ascending=True,
        )
        datetime_result = masked_rankdata_2d(
            data=asdatetime,
            mask=mask,
            missing_value=NaTns,
            method=method,
            ascending=True,
        )

        check_arrays(float_result, datetime_result)
Ejemplo n.º 17
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = PrecomputedLoader(constants=constants, dates=self.dates, sids=self.asset_ids)
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest}
            ),
            dates_to_test[0],
            dates_to_test[-1],
        )
        self.assertIsNotNone(result)
        self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns))

        result_index = self.asset_ids * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3, dtype=float)))

        for name, const in [("open", 1), ("close", 2), ("volume", 3)]:
            check_arrays(result[name], Series(index=result_index, data=full(result_shape, const, dtype=float)))
Ejemplo n.º 18
0
    def test_isnull_int_dtype(self, custom_missing_value):

        class CustomMissingValue(Factor):
            dtype = int64_dtype
            window_length = 0
            missing_value = custom_missing_value
            inputs = ()

        factor = CustomMissingValue()

        data = arange(25).reshape(5, 5)
        data[eye(5, dtype=bool)] = custom_missing_value

        graph = TermGraph(
            {
                'isnull': factor.isnull(),
                'notnull': factor.notnull(),
            }
        )

        results = self.run_graph(
            graph,
            initial_workspace={factor: data},
            mask=self.build_mask(ones((5, 5))),
        )
        check_arrays(results['isnull'], eye(5, dtype=bool))
        check_arrays(results['notnull'], ~eye(5, dtype=bool))
Ejemplo n.º 19
0
    def test_no_adjustments(self, name, data, lookback, adjustments,
                            missing_value, expected_output):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            in_out = zip(array.traverse(lookback), expected_output)
            for yielded, expected_yield in in_out:
                check_arrays(yielded, expected_yield)
Ejemplo n.º 20
0
 def check_output(self, expr, expected):
     result = expr._compute(
         [self.fake_raw_data[input_] for input_ in expr.inputs],
         self.mask.index,
         self.mask.columns,
         self.mask.values,
     )
     check_arrays(result, expected)
Ejemplo n.º 21
0
    def test_any(self):

        # FUN FACT: The inputs and outputs here are exactly the negation of
        # the inputs and outputs for test_all above. This isn't a coincidence.
        #
        # By de Morgan's Laws, we have::
        #
        #     ~(a & b) == (~a | ~b)
        #
        # negating both sides, we have::
        #
        #      (a & b) == ~(a | ~b)
        #
        # Since all(a, b) is isomorphic to (a & b), and any(a, b) is isomorphic
        # to (a | b), we have::
        #
        #     all(a, b) == ~(any(~a, ~b))
        #
        data = array([[0, 0, 0, 0, 0, 0],
                      [1, 0, 0, 0, 0, 0],
                      [0, 1, 0, 0, 0, 0],
                      [0, 0, 1, 0, 0, 0],
                      [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 1, 0],
                      [0, 0, 0, 0, 0, 1]], dtype=bool)

        # With a window_length of N, 1's should be "sticky" for the (N - 1)
        # days after the 1 in the base data.

        # Note that, the way ``self.run_graph`` works, we compute the same
        # number of output rows for all inputs, so we only get the last 4
        # outputs for expected_3 even though we have enought input data to
        # compute 5 rows.
        expected_3 = array([[1, 1, 1, 0, 0, 0],
                            [0, 1, 1, 1, 0, 0],
                            [0, 0, 1, 1, 1, 0],
                            [0, 0, 0, 1, 1, 1]], dtype=bool)

        expected_4 = array([[1, 1, 1, 0, 0, 0],
                            [1, 1, 1, 1, 0, 0],
                            [0, 1, 1, 1, 1, 0],
                            [0, 0, 1, 1, 1, 1]], dtype=bool)

        class Input(Filter):
            inputs = ()
            window_length = 0

        results = self.run_graph(
            TermGraph({
                '3': Any(inputs=[Input()], window_length=3),
                '4': Any(inputs=[Input()], window_length=4),
            }),
            initial_workspace={Input(): data},
            mask=self.build_mask(ones(shape=data.shape)),
        )

        check_arrays(results['3'], expected_3)
        check_arrays(results['4'], expected_4)
Ejemplo n.º 22
0
 def check_terms(self, terms, expected, initial_workspace, mask):
     """
     Compile the given terms into a TermGraph, compute it with
     initial_workspace, and compare the results with ``expected``.
     """
     graph = TermGraph(terms)
     results = self.run_graph(graph, initial_workspace, mask)
     for key, (res, exp) in dzip_exact(results, expected).items():
         check_arrays(res, exp)
Ejemplo n.º 23
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = PrecomputedLoader(
            constants=constants,
            dates=self.dates,
            sids=self.asset_ids,
        )
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={
                    'sumdiff': sumdiff,
                    'open': open_.latest,
                    'close': close.latest,
                    'volume': volume.latest,
                },
            ),
            dates_to_test[0],
            dates_to_test[-1]
        )
        self.assertIsNotNone(result)
        self.assertEqual(
            {'sumdiff', 'open', 'close', 'volume'},
            set(result.columns)
        )

        result_index = self.asset_ids * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(
            result['sumdiff'],
            Series(
                index=result_index,
                data=full(result_shape, -3, dtype=float),
            ),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(
                    index=result_index,
                    data=full(result_shape, const, dtype=float),
                ),
            )
Ejemplo n.º 24
0
 def check(terms):
     graph = TermGraph(terms)
     results = self.run_graph(
         graph,
         initial_workspace={f: data},
         mask=self.build_mask(ones((5, 5))),
     )
     for method in terms:
         check_arrays(results[method], expected_ranks[method])
Ejemplo n.º 25
0
 def check_terms(self, terms, expected, initial_workspace, mask):
     """
     Compile the given terms into a TermGraph, compute it with
     initial_workspace, and compare the results with ``expected``.
     """
     graph = TermGraph(terms)
     results = self.run_graph(graph, initial_workspace, mask)
     for key, (res, exp) in dzip_exact(results, expected).items():
         check_arrays(res, exp)
Ejemplo n.º 26
0
 def check(terms):
     graph = TermGraph(terms)
     results = self.run_graph(
         graph,
         initial_workspace={f: data},
         mask=self.build_mask(ones((5, 5))),
     )
     for method in terms:
         check_arrays(results[method], expected_ranks[method])
Ejemplo n.º 27
0
    def test_factor_with_multiple_outputs(self):
        dates = self.dates[5:10]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        num_dates = len(dates)
        num_assets = len(assets)
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = make_cascading_boolean_array(
            shape=(num_dates, num_assets),
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = make_alternating_boolean_array(
            shape=(num_dates, num_assets), first_value=False,
        )

        expected_no_mask_result = full(
            shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype,
        )

        masks = cascading_mask, alternating_mask, NotSpecified
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
            expected_no_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            open_price, close_price = MultipleOutputs(mask=mask)
            pipeline = Pipeline(
                columns={'open_price': open_price, 'close_price': close_price},
            )
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, case_column in (('open_price', open),
                                         ('close_price', close)):
                if mask is not NotSpecified:
                    mask_results = results['mask'].unstack()
                    check_arrays(mask_results.values, expected_mask)
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    constants[case_column], expected_mask,
                )
                assert_frame_equal(output_results, output_expected)
Ejemplo n.º 28
0
    def test_isfinite(self):
        data = self.randn_data(seed=10)
        data[:, 0] = nan
        data[:, 2] = inf
        data[:, 4] = -inf

        results = self.run_graph(
            TermGraph({'isfinite': self.f.isfinite()}),
            initial_workspace={self.f: data},
        )
        check_arrays(results['isfinite'], isfinite(data))
Ejemplo n.º 29
0
    def test_isfinite(self):
        data = self.randn_data(seed=10)
        data[:, 0] = nan
        data[:, 2] = inf
        data[:, 4] = -inf

        results = self.run_graph(
            TermGraph({'isfinite': self.f.isfinite()}),
            initial_workspace={self.f: data},
        )
        check_arrays(results['isfinite'], isfinite(data))
Ejemplo n.º 30
0
    def test_any(self):

        # FUN FACT: The inputs and outputs here are exactly the negation of
        # the inputs and outputs for test_all above. This isn't a coincidence.
        #
        # By de Morgan's Laws, we have::
        #
        #     ~(a & b) == (~a | ~b)
        #
        # negating both sides, we have::
        #
        #      (a & b) == ~(a | ~b)
        #
        # Since all(a, b) is isomorphic to (a & b), and any(a, b) is isomorphic
        # to (a | b), we have::
        #
        #     all(a, b) == ~(any(~a, ~b))
        #
        data = array(
            [[0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0],
             [0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0],
             [0, 0, 0, 0, 0, 1]],
            dtype=bool)

        # With a window_length of N, 1's should be "sticky" for the (N - 1)
        # days after the 1 in the base data.

        # Note that, the way ``self.run_graph`` works, we compute the same
        # number of output rows for all inputs, so we only get the last 4
        # outputs for expected_3 even though we have enought input data to
        # compute 5 rows.
        expected_3 = array([[1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 0, 0],
                            [0, 0, 1, 1, 1, 0], [0, 0, 0, 1, 1, 1]],
                           dtype=bool)

        expected_4 = array([[1, 1, 1, 0, 0, 0], [1, 1, 1, 1, 0, 0],
                            [0, 1, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1]],
                           dtype=bool)

        class Input(Filter):
            inputs = ()
            window_length = 0

        results = self.run_graph(
            TermGraph({
                '3': Any(inputs=[Input()], window_length=3),
                '4': Any(inputs=[Input()], window_length=4),
            }),
            initial_workspace={Input(): data},
            mask=self.build_mask(ones(shape=data.shape)),
        )

        check_arrays(results['3'], expected_3)
        check_arrays(results['4'], expected_4)
Ejemplo n.º 31
0
    def test_top_and_bottom(self):
        data = self.randn_data(seed=5)  # Fix a seed for determinism.

        mask_data = ones_like(data, dtype=bool)
        mask_data[:, 0] = False

        nan_data = data.copy()
        nan_data[:, 0] = nan

        mask = Mask()
        workspace = {self.f: data, mask: mask_data}

        methods = ['top', 'bottom']
        counts = 2, 3, 10
        term_combos = list(product(methods, counts, [True, False]))

        def termname(method, count, masked):
            return '_'.join([method, str(count), 'mask' if masked else ''])

        # Add a term for each permutation of top/bottom, count, and
        # mask/no_mask.
        terms = {}
        for method, count, masked in term_combos:
            kwargs = {'N': count}
            if masked:
                kwargs['mask'] = mask
            term = getattr(self.f, method)(**kwargs)
            terms[termname(method, count, masked)] = term

        results = self.run_graph(TermGraph(terms), initial_workspace=workspace)

        def expected_result(method, count, masked):
            # Ranking with a mask is equivalent to ranking with nans applied on
            # the masked values.
            to_rank = nan_data if masked else data

            if method == 'top':
                return rowwise_rank(-to_rank) < count
            elif method == 'bottom':
                return rowwise_rank(to_rank) < count

        for method, count, masked in term_combos:
            result = results[termname(method, count, masked)]

            # Check that `min(c, num_assets)` assets passed each day.
            passed_per_day = result.sum(axis=1)
            check_arrays(
                passed_per_day,
                full_like(passed_per_day, min(count, data.shape[1])),
            )

            expected = expected_result(method, count, masked)
            check_arrays(result, expected)
Ejemplo n.º 32
0
    def test_top_and_bottom(self):
        data = self.randn_data(seed=5)  # Fix a seed for determinism.

        mask_data = ones_like(data, dtype=bool)
        mask_data[:, 0] = False

        nan_data = data.copy()
        nan_data[:, 0] = nan

        mask = Mask()
        workspace = {self.f: data, mask: mask_data}

        methods = ['top', 'bottom']
        counts = 2, 3, 10
        term_combos = list(product(methods, counts, [True, False]))

        def termname(method, count, masked):
            return '_'.join([method, str(count), 'mask' if masked else ''])

        # Add a term for each permutation of top/bottom, count, and
        # mask/no_mask.
        terms = {}
        for method, count, masked in term_combos:
            kwargs = {'N': count}
            if masked:
                kwargs['mask'] = mask
            term = getattr(self.f, method)(**kwargs)
            terms[termname(method, count, masked)] = term

        results = self.run_graph(TermGraph(terms), initial_workspace=workspace)

        def expected_result(method, count, masked):
            # Ranking with a mask is equivalent to ranking with nans applied on
            # the masked values.
            to_rank = nan_data if masked else data

            if method == 'top':
                return rowwise_rank(-to_rank) < count
            elif method == 'bottom':
                return rowwise_rank(to_rank) < count

        for method, count, masked in term_combos:
            result = results[termname(method, count, masked)]

            # Check that `min(c, num_assets)` assets passed each day.
            passed_per_day = result.sum(axis=1)
            check_arrays(
                passed_per_day,
                full_like(passed_per_day, min(count, data.shape[1])),
            )

            expected = expected_result(method, count, masked)
            check_arrays(result, expected)
Ejemplo n.º 33
0
    def test_overwrite_adjustment_cases(self, name, baseline, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):
        array = AdjustedArray(baseline, adjustments, missing_value)

        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)
Ejemplo n.º 34
0
 def test_overwrite_adjustment_cases(self,
                                     name,
                                     data,
                                     lookback,
                                     adjustments,
                                     missing_value,
                                     expected):
     array = AdjustedArray(data, NOMASK, adjustments, missing_value)
     for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
         window_iter = array.traverse(lookback)
         for yielded, expected_yield in zip_longest(window_iter, expected):
             check_arrays(yielded, expected_yield)
Ejemplo n.º 35
0
    def test_rank_after_mask(self, name, factor_dtype):

        f = F(dtype=factor_dtype)
        # data = arange(25).reshape(5, 5).transpose() % 4
        data = array([[0, 1, 2, 3, 0],
                      [1, 2, 3, 0, 1],
                      [2, 3, 0, 1, 2],
                      [3, 0, 1, 2, 3],
                      [0, 1, 2, 3, 0]], dtype=factor_dtype)
        mask_data = ~eye(5, dtype=bool)
        initial_workspace = {f: data, Mask(): mask_data}

        graph = TermGraph(
            {
                "ascending_nomask": f.rank(ascending=True),
                "ascending_mask": f.rank(ascending=True, mask=Mask()),
                "descending_nomask": f.rank(ascending=False),
                "descending_mask": f.rank(ascending=False, mask=Mask()),
            }
        )

        expected = {
            "ascending_nomask": array([[1., 3., 4., 5., 2.],
                                       [2., 4., 5., 1., 3.],
                                       [3., 5., 1., 2., 4.],
                                       [4., 1., 2., 3., 5.],
                                       [1., 3., 4., 5., 2.]]),
            "descending_nomask": array([[4., 3., 2., 1., 5.],
                                        [3., 2., 1., 5., 4.],
                                        [2., 1., 5., 4., 3.],
                                        [1., 5., 4., 3., 2.],
                                        [4., 3., 2., 1., 5.]]),
            # Diagonal should be all nans, and anything whose rank was less
            # than the diagonal in the unmasked calc should go down by 1.
            "ascending_mask": array([[nan, 2., 3., 4., 1.],
                                     [2., nan, 4., 1., 3.],
                                     [2., 4., nan, 1., 3.],
                                     [3., 1., 2., nan, 4.],
                                     [1., 2., 3., 4., nan]]),
            "descending_mask": array([[nan, 3., 2., 1., 4.],
                                      [2., nan, 1., 4., 3.],
                                      [2., 1., nan, 4., 3.],
                                      [1., 4., 3., nan, 2.],
                                      [4., 3., 2., 1., nan]]),
        }

        results = self.run_graph(
            graph,
            initial_workspace,
            mask=self.build_mask(ones((5, 5))),
        )
        for method in results:
            check_arrays(expected[method], results[method])
Ejemplo n.º 36
0
    def test_multiplicative_adjustments(self, name, data, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)
Ejemplo n.º 37
0
    def test_map_can_only_return_none_if_missing_value_is_none(self):
        # Should work.
        la = LabelArray(self.strs, missing_value=None)
        result = la.map(lambda x: None)

        check_arrays(
            result,
            LabelArray(np.full_like(self.strs, None), missing_value=None),
        )

        la = LabelArray(self.strs, missing_value="__MISSING__")
        with self.assertRaises(TypeError):
            la.map(lambda x: None)
Ejemplo n.º 38
0
    def test_no_adjustments(self,
                            name,
                            data,
                            lookback,
                            adjustments,
                            missing_value,
                            expected_output):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            in_out = zip(array.traverse(lookback), expected_output)
            for yielded, expected_yield in in_out:
                check_arrays(yielded, expected_yield)
Ejemplo n.º 39
0
    def test_normalize_to_query_time(self, expected, tz, dates):
        # Order matters in pandas 0.18.2. Prior to that, using tz_convert on
        # a DatetimeIndex with DST/EST timestamps mixed resulted in some of
        # them being an hour off (1 hour past midnight).
        for scrambler in self.combos:
            df = pd.DataFrame({"timestamp": dates[scrambler]})
            result = normalize_timestamp_to_query_time(df,
                                                       time(8, 45),
                                                       tz,
                                                       inplace=False,
                                                       ts_field='timestamp')

            timestamps = result['timestamp'].values
            check_arrays(np.sort(timestamps), np.sort(expected[scrambler]))
Ejemplo n.º 40
0
    def test_compare_to_str(self,
                            compval,
                            shape,
                            array_astype,
                            missing_value):

        strs = self.strs.reshape(shape).astype(array_astype)
        if missing_value is None:
            # As of numpy 1.9.2, object array != None returns just False
            # instead of an array, with a deprecation warning saying the
            # behavior will change in the future.  Work around that by just
            # using the ufunc.
            notmissing = np.not_equal(strs, missing_value)
        else:
            if not isinstance(missing_value, array_astype):
                missing_value = array_astype(missing_value, 'utf-8')
            notmissing = (strs != missing_value)

        arr = LabelArray(strs, missing_value=missing_value)

        if not isinstance(compval, array_astype):
            compval = array_astype(compval, 'utf-8')

        # arr.missing_value should behave like NaN.
        check_arrays(
            arr == compval,
            (strs == compval) & notmissing,
        )
        check_arrays(
            arr != compval,
            (strs != compval) & notmissing,
        )

        np_startswith = np.vectorize(lambda elem: elem.startswith(compval))
        check_arrays(
            arr.startswith(compval),
            np_startswith(strs) & notmissing,
        )

        np_endswith = np.vectorize(lambda elem: elem.endswith(compval))
        check_arrays(
            arr.endswith(compval),
            np_endswith(strs) & notmissing,
        )

        np_contains = np.vectorize(lambda elem: compval in elem)
        check_arrays(
            arr.has_substring(compval),
            np_contains(strs) & notmissing,
        )
Ejemplo n.º 41
0
    def test_normalize_to_query_time(self, expected, tz, dates):
        # Order matters in pandas 0.18.2. Prior to that, using tz_convert on
        # a DatetimeIndex with DST/EST timestamps mixed resulted in some of
        # them being an hour off (1 hour past midnight).
        for scrambler in self.combos:
            df = pd.DataFrame({"timestamp": dates[scrambler]})
            result = normalize_timestamp_to_query_time(df,
                                                       time(8, 45),
                                                       tz,
                                                       inplace=False,
                                                       ts_field='timestamp')

            timestamps = result['timestamp'].values
            check_arrays(np.sort(timestamps), np.sort(expected[scrambler]))
Ejemplo n.º 42
0
        def check(terms):
            graph = TermGraph(terms)
            results = self.run_graph(
                graph,
                initial_workspace={
                    f: data,
                    c: classifier_data,
                    str_c: string_classifier_data,
                },
                mask=self.build_mask(ones((5, 5))),
            )

            for method in terms:
                check_arrays(results[method], expected_grouped_ranks[method])
Ejemplo n.º 43
0
    def test_map_can_only_return_none_if_missing_value_is_none(self):

        # Should work.
        la = LabelArray(self.strs, missing_value=None)
        result = la.map(lambda x: None)

        check_arrays(
            result,
            LabelArray(np.full_like(self.strs, None), missing_value=None),
        )

        la = LabelArray(self.strs, missing_value="__MISSING__")
        with self.assertRaises(TypeError):
            la.map(lambda x: None)
Ejemplo n.º 44
0
    def test_compare_to_str(self,
                            compval,
                            shape,
                            array_astype,
                            missing_value):

        strs = self.strs.reshape(shape).astype(array_astype)
        if missing_value is None:
            # As of numpy 1.9.2, object array != None returns just False
            # instead of an array, with a deprecation warning saying the
            # behavior will change in the future.  Work around that by just
            # using the ufunc.
            notmissing = np.not_equal(strs, missing_value)
        else:
            if not isinstance(missing_value, array_astype):
                missing_value = array_astype(missing_value, 'utf-8')
            notmissing = (strs != missing_value)

        arr = LabelArray(strs, missing_value=missing_value)

        if not isinstance(compval, array_astype):
            compval = array_astype(compval, 'utf-8')

        # arr.missing_value should behave like NaN.
        check_arrays(
            arr == compval,
            (strs == compval) & notmissing,
        )
        check_arrays(
            arr != compval,
            (strs != compval) & notmissing,
        )

        np_startswith = np.vectorize(lambda elem: elem.startswith(compval))
        check_arrays(
            arr.startswith(compval),
            np_startswith(strs) & notmissing,
        )

        np_endswith = np.vectorize(lambda elem: elem.endswith(compval))
        check_arrays(
            arr.endswith(compval),
            np_endswith(strs) & notmissing,
        )

        np_contains = np.vectorize(lambda elem: compval in elem)
        check_arrays(
            arr.has_substring(compval),
            np_contains(strs) & notmissing,
        )
Ejemplo n.º 45
0
    def test_notnan(self):
        data = self.randn_data(seed=10)
        diag = eye(*data.shape, dtype=bool)
        data[diag] = nan

        results = self.run_graph(
            TermGraph({
                'notnan': self.f.notnan(),
                'notnull': self.f.notnull(),
            }),
            initial_workspace={self.f: data},
        )
        check_arrays(results['notnan'], ~diag)
        check_arrays(results['notnull'], ~diag)
Ejemplo n.º 46
0
        def check(terms):
            graph = TermGraph(terms)
            results = self.run_graph(
                graph,
                initial_workspace={
                    f: data,
                    c: classifier_data,
                    str_c: string_classifier_data,
                },
                mask=self.build_mask(ones((5, 5))),
            )

            for method in terms:
                check_arrays(results[method], expected_grouped_ranks[method])
Ejemplo n.º 47
0
    def test_object1darrayoverwrite(self):
        pairs = [u + l for u, l in product(ascii_uppercase, ascii_lowercase)]
        categories = pairs + ["~" + c for c in pairs]
        baseline = LabelArray(
            np.array([["".join((r, c)) for c in "abc"] for r in ascii_uppercase]),
            None,
            categories,
        )
        full_expected = baseline.copy()

        def flip(cs):
            if cs is None:
                return None
            if cs[0] != "~":
                return "~" + cs
            return cs

        def make_overwrite(fr, lr, fc, lc):
            fr, lr, fc, lc = map(ord, (fr, lr, fc, lc))
            fr -= ord("A")
            lr -= ord("A")
            fc -= ord("a")
            lc -= ord("a")

            return Object1DArrayOverwrite(
                fr,
                lr,
                fc,
                lc,
                baseline[fr : lr + 1, fc].map(flip),
            )

        overwrites = {
            3: [make_overwrite("A", "B", "a", "a")],
            4: [make_overwrite("A", "C", "b", "c")],
            5: [make_overwrite("D", "D", "a", "b")],
        }

        it = AdjustedArray(baseline, overwrites, None).traverse(3)

        window = next(it)
        expected = full_expected[:3]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:2, 0] = LabelArray(["~Aa", "~Ba"], None)
        expected = full_expected[1:4]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:3, 1:3] = LabelArray(
            [["~Ab", "~Ac"], ["~Bb", "~Bc"], ["~Cb", "~Cb"]], None
        )
        expected = full_expected[2:5]
        check_arrays(window, expected)

        window = next(it)
        full_expected[3, :2] = "~Da"
        expected = full_expected[3:6]
        check_arrays(window, expected)
Ejemplo n.º 48
0
    def test_notnan(self):
        data = self.randn_data(seed=10)
        diag = eye(*data.shape, dtype=bool)
        data[diag] = nan

        results = self.run_graph(
            TermGraph({
                'notnan': self.f.notnan(),
                'notnull': self.f.notnull(),
            }),
            initial_workspace={self.f: data},
        )
        check_arrays(results['notnan'], ~diag)
        check_arrays(results['notnull'], ~diag)
Ejemplo n.º 49
0
    def test_object1darrayoverwrite(self):
        pairs = [u + l for u, l in product(ascii_uppercase, ascii_lowercase)]
        categories = pairs + ['~' + c for c in pairs]
        baseline = LabelArray(
            array([[''.join((r, c)) for c in 'abc'] for r in ascii_uppercase]),
            None,
            categories,
        )
        full_expected = baseline.copy()

        def flip(cs):
            if cs is None:
                return None
            if cs[0] != '~':
                return '~' + cs
            return cs

        def make_overwrite(fr, lr, fc, lc):
            fr, lr, fc, lc = map(ord, (fr, lr, fc, lc))
            fr -= ord('A')
            lr -= ord('A')
            fc -= ord('a')
            lc -= ord('a')

            return Object1DArrayOverwrite(
                fr,
                lr,
                fc,
                lc,
                baseline[fr:lr + 1, fc].map(flip),
            )

        overwrites = {
            3: [make_overwrite('A', 'B', 'a', 'a')],
            4: [make_overwrite('A', 'C', 'b', 'c')],
            5: [make_overwrite('D', 'D', 'a', 'b')],
        }

        it = AdjustedArray(baseline, overwrites, None).traverse(3)

        window = next(it)
        expected = full_expected[:3]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:2, 0] = LabelArray(['~Aa', '~Ba'], None)
        expected = full_expected[1:4]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:3, 1:3] = LabelArray(
            [['~Ab', '~Ac'], ['~Bb', '~Bc'], ['~Cb', '~Cb']], None)
        expected = full_expected[2:5]
        check_arrays(window, expected)

        window = next(it)
        full_expected[3, :2] = '~Da'
        expected = full_expected[3:6]
        check_arrays(window, expected)
Ejemplo n.º 50
0
    def test_rank_after_mask(self, name, factor_dtype):

        f = F(dtype=factor_dtype)
        # data = arange(25).reshape(5, 5).transpose() % 4
        data = array([[0, 1, 2, 3, 0], [1, 2, 3, 0, 1], [2, 3, 0, 1, 2],
                      [3, 0, 1, 2, 3], [0, 1, 2, 3, 0]],
                     dtype=factor_dtype)
        mask_data = ~eye(5, dtype=bool)
        initial_workspace = {f: data, Mask(): mask_data}

        graph = TermGraph({
            "ascending_nomask":
            f.rank(ascending=True),
            "ascending_mask":
            f.rank(ascending=True, mask=Mask()),
            "descending_nomask":
            f.rank(ascending=False),
            "descending_mask":
            f.rank(ascending=False, mask=Mask()),
        })

        expected = {
            "ascending_nomask":
            array([[1., 3., 4., 5., 2.], [2., 4., 5., 1., 3.],
                   [3., 5., 1., 2., 4.], [4., 1., 2., 3., 5.],
                   [1., 3., 4., 5., 2.]]),
            "descending_nomask":
            array([[4., 3., 2., 1., 5.], [3., 2., 1., 5., 4.],
                   [2., 1., 5., 4., 3.], [1., 5., 4., 3., 2.],
                   [4., 3., 2., 1., 5.]]),
            # Diagonal should be all nans, and anything whose rank was less
            # than the diagonal in the unmasked calc should go down by 1.
            "ascending_mask":
            array([[nan, 2., 3., 4., 1.], [2., nan, 4., 1., 3.],
                   [2., 4., nan, 1., 3.], [3., 1., 2., nan, 4.],
                   [1., 2., 3., 4., nan]]),
            "descending_mask":
            array([[nan, 3., 2., 1., 4.], [2., nan, 1., 4., 3.],
                   [2., 1., nan, 4., 3.], [1., 4., 3., nan, 2.],
                   [4., 3., 2., 1., nan]]),
        }

        results = self.run_graph(
            graph,
            initial_workspace,
            mask=self.build_mask(ones((5, 5))),
        )
        for method in results:
            check_arrays(expected[method], results[method])
Ejemplo n.º 51
0
    def test_object1darrayoverwrite(self):
        pairs = [u + l for u, l in product(ascii_uppercase, ascii_lowercase)]
        categories = pairs + ['~' + c for c in pairs]
        baseline = LabelArray(
            array([[''.join((r, c)) for c in 'abc'] for r in ascii_uppercase]),
            None,
            categories,
        )
        full_expected = baseline.copy()

        def flip(cs):
            if cs is None:
                return None
            if cs[0] != '~':
                return '~' + cs
            return cs

        def make_overwrite(fr, lr, fc, lc):
            fr, lr, fc, lc = map(ord, (fr, lr, fc, lc))
            fr -= ord('A')
            lr -= ord('A')
            fc -= ord('a')
            lc -= ord('a')

            return Object1DArrayOverwrite(
                fr, lr,
                fc, lc,
                baseline[fr:lr + 1, fc].map(flip),
            )

        overwrites = {
            3: [make_overwrite('A', 'B', 'a', 'a')],
            4: [make_overwrite('A', 'C', 'b', 'c')],
            5: [make_overwrite('D', 'D', 'a', 'b')],
        }

        it = AdjustedArray(baseline, overwrites, None).traverse(3)

        window = next(it)
        expected = full_expected[:3]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:2, 0] = LabelArray(['~Aa', '~Ba'], None)
        expected = full_expected[1:4]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:3, 1:3] = LabelArray([['~Ab', '~Ac'],
                                              ['~Bb', '~Bc'],
                                              ['~Cb', '~Cb']], None)
        expected = full_expected[2:5]
        check_arrays(window, expected)

        window = next(it)
        full_expected[3, :2] = '~Da'
        expected = full_expected[3:6]
        check_arrays(window, expected)
    def test_overwrite_adjustment_cases(self,
                                        name,
                                        baseline,
                                        lookback,
                                        adjustments,
                                        missing_value,
                                        perspective_offset,
                                        expected):
        array = AdjustedArray(baseline, adjustments, missing_value)

        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)
Ejemplo n.º 53
0
    def test_multiplicative_adjustments(self,
                                        name,
                                        data,
                                        lookback,
                                        adjustments,
                                        missing_value,
                                        perspective_offset,
                                        expected):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)
Ejemplo n.º 54
0
    def test_masking(self, dtype, missing_value, window_length):
        missing_value = coerce_to_dtype(dtype, missing_value)
        baseline_ints = arange(15).reshape(5, 3)
        baseline = baseline_ints.astype(dtype)
        mask = (baseline_ints % 2).astype(bool)
        masked_baseline = where(mask, baseline, missing_value)

        array = AdjustedArray(
            baseline,
            mask,
            adjustments={},
            missing_value=missing_value,
        )

        gen_expected = moving_window(masked_baseline, window_length)
        gen_actual = array.traverse(window_length)
        for expected, actual in zip(gen_expected, gen_actual):
            check_arrays(expected, actual)
Ejemplo n.º 55
0
    def test_masking(self, dtype, missing_value, window_length):
        missing_value = coerce_to_dtype(dtype, missing_value)
        baseline_ints = arange(15).reshape(5, 3)
        baseline = baseline_ints.astype(dtype)
        mask = (baseline_ints % 2).astype(bool)
        masked_baseline = where(mask, baseline, missing_value)

        array = AdjustedArray(
            baseline,
            mask,
            adjustments={},
            missing_value=missing_value,
        )

        gen_expected = moving_window(masked_baseline, window_length)
        gen_actual = array.traverse(window_length)
        for expected, actual in zip(gen_expected, gen_actual):
            check_arrays(expected, actual)
Ejemplo n.º 56
0
    def test_all(self):

        data = array([[1, 1, 1, 1, 1, 1],
                      [0, 1, 1, 1, 1, 1],
                      [1, 0, 1, 1, 1, 1],
                      [1, 1, 0, 1, 1, 1],
                      [1, 1, 1, 0, 1, 1],
                      [1, 1, 1, 1, 0, 1],
                      [1, 1, 1, 1, 1, 0]], dtype=bool)

        # With a window_length of N, 0's should be "sticky" for the (N - 1)
        # days after the 0 in the base data.

        # Note that, the way ``self.run_graph`` works, we compute the same
        # number of output rows for all inputs, so we only get the last 4
        # outputs for expected_3 even though we have enought input data to
        # compute 5 rows.
        expected_3 = array([[0, 0, 0, 1, 1, 1],
                            [1, 0, 0, 0, 1, 1],
                            [1, 1, 0, 0, 0, 1],
                            [1, 1, 1, 0, 0, 0]], dtype=bool)

        expected_4 = array([[0, 0, 0, 1, 1, 1],
                            [0, 0, 0, 0, 1, 1],
                            [1, 0, 0, 0, 0, 1],
                            [1, 1, 0, 0, 0, 0]], dtype=bool)

        class Input(Filter):
            inputs = ()
            window_length = 0

        results = self.run_graph(
            TermGraph({
                '3': All(inputs=[Input()], window_length=3),
                '4': All(inputs=[Input()], window_length=4),
            }),
            initial_workspace={Input(): data},
            mask=self.build_mask(ones(shape=data.shape)),
        )

        check_arrays(results['3'], expected_3)
        check_arrays(results['4'], expected_4)
Ejemplo n.º 57
0
    def test_update_labels(self):
        data = np.array(
            [
                ["aaa", "bbb", "ccc"],
                ["ddd", "eee", "fff"],
                ["ggg", "hhh", "iii"],
                ["jjj", "kkk", "lll"],
                ["mmm", "nnn", "ooo"],
            ]
        )
        label_array = LabelArray(data, missing_value="")

        adj_array = AdjustedArray(
            data=label_array,
            adjustments={4: [ObjectOverwrite(2, 3, 0, 0, "ppp")]},
            missing_value="",
        )

        expected_data = np.array(
            [
                ["aaa-foo", "bbb-foo", "ccc-foo"],
                ["ddd-foo", "eee-foo", "fff-foo"],
                ["ggg-foo", "hhh-foo", "iii-foo"],
                ["jjj-foo", "kkk-foo", "lll-foo"],
                ["mmm-foo", "nnn-foo", "ooo-foo"],
            ]
        )
        expected_label_array = LabelArray(expected_data, missing_value="")

        expected_adj_array = AdjustedArray(
            data=expected_label_array,
            adjustments={4: [ObjectOverwrite(2, 3, 0, 0, "ppp-foo")]},
            missing_value="",
        )

        adj_array.update_labels(lambda x: x + "-foo")

        # Check that the mapped AdjustedArray has the expected baseline
        # values and adjustment values.
        check_arrays(adj_array.data, expected_adj_array.data)
        assert adj_array.adjustments == expected_adj_array.adjustments