def returnType(self): # This makes sure this is called after SparkContext is initialized. # ``_parse_datatype_string`` accesses to JVM for parsing a DDL formatted string. if self._returnType_placeholder is None: if isinstance(self._returnType, DataType): self._returnType_placeholder = self._returnType else: self._returnType_placeholder = _parse_datatype_string( self._returnType) if self.evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF: try: to_arrow_type(self._returnType_placeholder) except TypeError: raise NotImplementedError( "Invalid returnType with scalar Pandas UDFs: %s is " "not supported" % str(self._returnType_placeholder)) elif self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF: if isinstance(self._returnType_placeholder, StructType): try: to_arrow_schema(self._returnType_placeholder) except TypeError: raise NotImplementedError( "Invalid returnType with grouped map Pandas UDFs: " "%s is not supported" % str(self._returnType_placeholder)) else: raise TypeError("Invalid returnType for grouped map Pandas " "UDFs: returnType must be a StructType.") return self._returnType_placeholder
def returnType(self): # This makes sure this is called after SparkContext is initialized. # ``_parse_datatype_string`` accesses to JVM for parsing a DDL formatted string. if self._returnType_placeholder is None: if isinstance(self._returnType, DataType): self._returnType_placeholder = self._returnType else: self._returnType_placeholder = _parse_datatype_string(self._returnType) if self.evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF: try: to_arrow_type(self._returnType_placeholder) except TypeError: raise NotImplementedError( "Invalid returnType with scalar Pandas UDFs: %s is " "not supported" % str(self._returnType_placeholder)) elif self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF: if isinstance(self._returnType_placeholder, StructType): try: to_arrow_schema(self._returnType_placeholder) except TypeError: raise NotImplementedError( "Invalid returnType with grouped map Pandas UDFs: " "%s is not supported" % str(self._returnType_placeholder)) else: raise TypeError("Invalid returnType for grouped map Pandas " "UDFs: returnType must be a StructType.") elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF: try: to_arrow_type(self._returnType_placeholder) except TypeError: raise NotImplementedError( "Invalid returnType with grouped aggregate Pandas UDFs: " "%s is not supported" % str(self._returnType_placeholder)) return self._returnType_placeholder
def test_schema_conversion_roundtrip(self): from pyspark.sql.types import from_arrow_schema, to_arrow_schema arrow_schema = to_arrow_schema(self.schema) schema_rt = from_arrow_schema(arrow_schema) self.assertEquals(self.schema, schema_rt)
def test_schema_conversion_roundtrip(self): from pyspark.sql.types import from_arrow_schema, to_arrow_schema arrow_schema = to_arrow_schema(self.schema) schema_rt = from_arrow_schema(arrow_schema) self.assertEquals(self.schema, schema_rt)