def test_create_table_with_existing_schema(self) -> None: df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"]) deltaTable = DeltaTable.create(self.spark).tableName("test") \ .addColumns(df.schema) \ .addColumn("value2", dataType="int")\ .partitionedBy(["value2", "value"])\ .execute() self.__verify_table_schema("test", deltaTable.toDF().schema, ["key", "value", "value2"], [StringType(), LongType(), IntegerType()], nullables={"key", "value", "value2"}, partitioningColumns=["value", "value2"]) # verify creating table with list of structFields deltaTable2 = DeltaTable.create(self.spark).tableName("test2").addColumns( df.schema.fields) \ .addColumn("value2", dataType="int") \ .partitionedBy("value2", "value")\ .execute() self.__verify_table_schema("test2", deltaTable2.toDF().schema, ["key", "value", "value2"], [StringType(), LongType(), IntegerType()], nullables={"key", "value", "value2"}, partitioningColumns=["value", "value2"])
def test_create_replace_table_with_no_spark_session_passed(self) -> None: # create table. deltaTable = DeltaTable.create().tableName("test")\ .addColumn("value", dataType="int").execute() self.__verify_table_schema("test", deltaTable.toDF().schema, ["value"], [IntegerType()], nullables={"value"}) # ignore existence with createIfNotExists deltaTable = DeltaTable.createIfNotExists().tableName("test") \ .addColumn("value2", dataType="int").execute() self.__verify_table_schema("test", deltaTable.toDF().schema, ["value"], [IntegerType()], nullables={"value"}) # replace table with replace deltaTable = DeltaTable.replace().tableName("test") \ .addColumn("key", dataType="int").execute() self.__verify_table_schema("test", deltaTable.toDF().schema, ["key"], [IntegerType()], nullables={"key"}) # replace with a new column again deltaTable = DeltaTable.createOrReplace().tableName("test") \ .addColumn("col1", dataType="int").execute() self.__verify_table_schema("test", deltaTable.toDF().schema, ["col1"], [IntegerType()], nullables={"col1"})
def __create_table(self, ifNotExists, tableName=None, location=None): builder = DeltaTable.createIfNotExists(self.spark) if ifNotExists \ else DeltaTable.create(self.spark) if tableName: builder = builder.tableName(tableName) if location: builder = builder.location(location) return self.__build_delta_table(builder)
def test_verify_paritionedBy_compatibility(self): tableBuilder = DeltaTable.create(self.spark).tableName("testTable") \ .addColumn("col1", "int", comment="foo", nullable=False) \ .addColumn("col2", IntegerType(), generatedAlwaysAs="col1 + 10") \ .property("foo", "bar") \ .comment("comment") tableBuilder._jbuilder = tableBuilder._jbuilder \ .partitionedBy(_to_seq(self.spark._sc, ["col1"])) deltaTable = tableBuilder.execute() self.__verify_table_schema( "testTable", deltaTable.toDF().schema, ["col1", "col2"], [IntegerType(), IntegerType()], nullables={"col2"}, comments={"col1": "foo"}, properties={"foo": "bar"}, partitioningColumns=["col1"], tblComment="comment")
def test_delta_table_builder_with_bad_args(self) -> None: builder = DeltaTable.create(self.spark) # bad table name with self.assertRaises(TypeError): builder.tableName(1) # type: ignore[arg-type] # bad location with self.assertRaises(TypeError): builder.location(1) # type: ignore[arg-type] # bad comment with self.assertRaises(TypeError): builder.comment(1) # type: ignore[arg-type] # bad column name with self.assertRaises(TypeError): builder.addColumn(1, "int") # type: ignore[arg-type] # bad datatype. with self.assertRaises(TypeError): builder.addColumn("a", 1) # type: ignore[arg-type] # bad column datatype - can't be pared with self.assertRaises(ParseException): builder.addColumn("a", "1") # bad comment with self.assertRaises(TypeError): builder.addColumn("a", "int", comment=1) # type: ignore[arg-type] # bad generatedAlwaysAs with self.assertRaises(TypeError): builder.addColumn("a", "int", generatedAlwaysAs=1) # type: ignore[arg-type] # bad nullable with self.assertRaises(TypeError): builder.addColumn("a", "int", nullable=1) # type: ignore[arg-type] # bad existing schema with self.assertRaises(TypeError): builder.addColumns(1) # type: ignore[arg-type] # bad existing schema. with self.assertRaises(TypeError): builder.addColumns([StructField("1", IntegerType()), 1]) # type: ignore[list-item] # bad partitionedBy col name with self.assertRaises(TypeError): builder.partitionedBy(1) # type: ignore[call-overload] with self.assertRaises(TypeError): builder.partitionedBy(1, "1") # type: ignore[call-overload] with self.assertRaises(TypeError): builder.partitionedBy([1]) # type: ignore[list-item] # bad property key with self.assertRaises(TypeError): builder.property(1, "1") # type: ignore[arg-type] # bad property value with self.assertRaises(TypeError): builder.property("1", 1) # type: ignore[arg-type]