def test_nested_udt_in_df(self): expected_schema = DataTypes.ROW() \ .add("_1", DataTypes.BIGINT()).add("_2", DataTypes.ARRAY(PythonOnlyUDT())) data = (1, [PythonOnlyPoint(float(1), float(2))]) self.assertEqual(expected_schema, _infer_type(data)) expected_schema = DataTypes.ROW().add("_1", DataTypes.BIGINT()).add( "_2", DataTypes.MAP(DataTypes.BIGINT(False), PythonOnlyUDT())) p = (1, {1: PythonOnlyPoint(1, float(2))}) self.assertEqual(expected_schema, _infer_type(p))
def test_udt(self): p = ExamplePoint(1.0, 2.0) self.assertEqual(_infer_type(p), ExamplePointUDT()) _create_type_verifier(ExamplePointUDT())(ExamplePoint(1.0, 2.0)) self.assertRaises(ValueError, lambda: _create_type_verifier(ExamplePointUDT())([1.0, 2.0])) p = PythonOnlyPoint(1.0, 2.0) self.assertEqual(_infer_type(p), PythonOnlyUDT()) _create_type_verifier(PythonOnlyUDT())(PythonOnlyPoint(1.0, 2.0)) self.assertRaises(ValueError, lambda: _create_type_verifier(PythonOnlyUDT())([1.0, 2.0]))
def test_infer_bigint_type(self): longrow = [Row(f1='a', f2=100000000000000)] schema = _infer_schema_from_data(longrow) self.assertEqual(DataTypes.BIGINT(), schema.fields[1].data_type) self.assertEqual(DataTypes.BIGINT(), _infer_type(1)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2 ** 10)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2 ** 20)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2 ** 31 - 1)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2 ** 31)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2 ** 61)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2 ** 71))
def test_infer_bigint_type(self): longrow = [Row(f1='a', f2=100000000000000)] schema = _infer_schema_from_data(longrow) self.assertEqual(DataTypes.BIGINT(), schema.fields[1].data_type) self.assertEqual(DataTypes.BIGINT(), _infer_type(1)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2**10)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2**20)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2**31 - 1)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2**31)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2**61)) self.assertEqual(DataTypes.BIGINT(), _infer_type(2**71))
def assert_collect_success(typecode, value, element_type): self.assertEqual(element_type, str(_infer_type(array.array(typecode, [value])).element_type))
def assert_collect_success(typecode, value, element_type): self.assertEqual( element_type, str(_infer_type(array.array(typecode, [value])).element_type))