def test_rf_local_is_in(self):
        from pyspark.sql.functions import lit, array, col
        from pyspark.sql import Row

        nd = 5
        t = Tile(np.array([[1, 3, 4], [nd, 0, 3]]),
                 CellType.uint8().with_no_data_value(nd))
        # note the convert is due to issue #188
        df = self.spark.createDataFrame([Row(t=t)]) \
            .withColumn('a', array(lit(3), lit(4))) \
            .withColumn('in2', rf_convert_cell_type(
                rf_local_is_in(col('t'), array(lit(0), lit(4))),
                'uint8')) \
            .withColumn('in3', rf_convert_cell_type(rf_local_is_in('t', 'a'), 'uint8')) \
            .withColumn('in4', rf_convert_cell_type(
                rf_local_is_in('t', array(lit(0), lit(4), lit(3))),
                'uint8')) \
            .withColumn('in_list', rf_convert_cell_type(rf_local_is_in(col('t'), [4, 1]), 'uint8'))

        result = df.first()
        self.assertEqual(result['in2'].cells.sum(), 2)
        assert_equal(result['in2'].cells, np.isin(t.cells, np.array([0, 4])))
        self.assertEqual(result['in3'].cells.sum(), 3)
        self.assertEqual(result['in4'].cells.sum(), 4)
        self.assertEqual(
            result['in_list'].cells.sum(), 2,
            "Tile value {} should contain two 1s as: [[1, 0, 1],[0, 0, 0]]".
            format(result['in_list'].cells))
    def test_mask(self):
        from pyspark.sql import Row
        from pyrasterframes.rf_types import Tile, CellType

        np.random.seed(999)
        # importantly exclude 0 from teh range because that's the nodata value for the `data_tile`'s cell type
        ma = np.ma.array(np.random.randint(1, 10, (5, 5), dtype='int8'),
                         mask=np.random.rand(5, 5) > 0.7)
        expected_data_values = ma.compressed().size
        expected_no_data_values = ma.size - expected_data_values
        self.assertTrue(expected_data_values > 0,
                        "Make sure random seed is cooperative ")
        self.assertTrue(expected_no_data_values > 0,
                        "Make sure random seed is cooperative ")

        data_tile = Tile(np.ones(ma.shape, ma.dtype), CellType.uint8())

        df = self.spark.createDataFrame([Row(t=data_tile, m=Tile(ma))]) \
            .withColumn('masked_t', rf_mask('t', 'm'))

        result = df.select(rf_data_cells('masked_t')).first()[0]
        self.assertEqual(
            result, expected_data_values,
            f"Masked tile should have {expected_data_values} data values but found: {df.select('masked_t').first()[0].cells}."
            f"Original data: {data_tile.cells}"
            f"Masked by {ma}")

        nd_result = df.select(rf_no_data_cells('masked_t')).first()[0]
        self.assertEqual(nd_result, expected_no_data_values)

        # deser of tile is correct
        self.assertEqual(
            df.select('masked_t').first()[0].cells.compressed().size,
            expected_data_values)
    def test_rf_where(self):
        cond = Tile(np.random.binomial(1, 0.35, (10, 10)), CellType.uint8())
        x = Tile(np.random.randint(-20, 10, (10, 10)), CellType.int8())
        y = Tile(np.random.randint(0, 30, (10, 10)), CellType.int8())

        df = self.spark.createDataFrame([Row(cond=cond, x=x, y=y)])
        result = df.select(rf_where('cond', 'x', 'y')).first()[0].cells
        assert_equal(result, np.where(cond.cells, x.cells, y.cells))
    def test_mask_by_values(self):

        tile = Tile(np.random.randint(1, 100, (5, 5)), CellType.uint8())
        mask_tile = Tile(np.array(range(1, 26), 'uint8').reshape(5, 5))
        expected_diag_nd = Tile(np.ma.masked_array(tile.cells, mask=np.eye(5)))

        df = self.spark.createDataFrame([Row(t=tile, m=mask_tile)]) \
            .select(rf_mask_by_values('t', 'm', [0, 6, 12, 18, 24]))  # values on the diagonal
        result0 = df.first()
        # assert_equal(result0[0].cells, expected_diag_nd)
        self.assertTrue(result0[0] == expected_diag_nd)
    def test_rf_local_data_and_no_data(self):
        from pyspark.sql import Row
        from pyrasterframes.rf_types import Tile

        nd = 5
        t = Tile(np.array([[1, 3, 4], [nd, 0, 3]]),
                 CellType.uint8().with_no_data_value(nd))
        # note the convert is due to issue #188
        df = self.spark.createDataFrame([Row(t=t)])\
            .withColumn('lnd', rf_convert_cell_type(rf_local_no_data('t'), 'uint8')) \
            .withColumn('ld',  rf_convert_cell_type(rf_local_data('t'),    'uint8'))

        result = df.first()
        result_nd = result['lnd']
        assert_equal(result_nd.cells, t.cells.mask)

        result_d = result['ld']
        assert_equal(result_d.cells, np.invert(t.cells.mask))
    def test_rf_interpret_cell_type_as(self):
        from pyspark.sql import Row
        from pyrasterframes.rf_types import Tile

        df = self.spark.createDataFrame([
            Row(t=Tile(np.array([[1, 3, 4], [5, 0, 3]]),
                       CellType.uint8().with_no_data_value(5)))
        ])
        df = df.withColumn('tile', rf_interpret_cell_type_as(
            't', 'uint8ud3'))  # threes become ND
        result = df.select(
            rf_tile_sum(rf_local_equal(
                't', lit(3))).alias('threes')).first()['threes']
        self.assertEqual(result, 2)

        result_5 = df.select(
            rf_tile_sum(rf_local_equal(
                't', lit(5))).alias('fives')).first()['fives']
        self.assertEqual(result_5, 0)