Example #1
0
    def test_create_table_with_existing_schema(self) -> None:
        df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"])
        deltaTable = DeltaTable.create(self.spark).tableName("test") \
            .addColumns(df.schema) \
            .addColumn("value2", dataType="int")\
            .partitionedBy(["value2", "value"])\
            .execute()
        self.__verify_table_schema("test",
                                   deltaTable.toDF().schema,
                                   ["key", "value", "value2"],
                                   [StringType(), LongType(), IntegerType()],
                                   nullables={"key", "value", "value2"},
                                   partitioningColumns=["value", "value2"])

        # verify creating table with list of structFields
        deltaTable2 = DeltaTable.create(self.spark).tableName("test2").addColumns(
            df.schema.fields) \
            .addColumn("value2", dataType="int") \
            .partitionedBy("value2", "value")\
            .execute()
        self.__verify_table_schema("test2",
                                   deltaTable2.toDF().schema,
                                   ["key", "value", "value2"],
                                   [StringType(), LongType(), IntegerType()],
                                   nullables={"key", "value", "value2"},
                                   partitioningColumns=["value", "value2"])
Example #2
0
    def test_create_replace_table_with_no_spark_session_passed(self) -> None:
        # create table.
        deltaTable = DeltaTable.create().tableName("test")\
            .addColumn("value", dataType="int").execute()
        self.__verify_table_schema("test",
                                   deltaTable.toDF().schema, ["value"],
                                   [IntegerType()],
                                   nullables={"value"})

        # ignore existence with createIfNotExists
        deltaTable = DeltaTable.createIfNotExists().tableName("test") \
            .addColumn("value2", dataType="int").execute()
        self.__verify_table_schema("test",
                                   deltaTable.toDF().schema, ["value"],
                                   [IntegerType()],
                                   nullables={"value"})

        # replace table with replace
        deltaTable = DeltaTable.replace().tableName("test") \
            .addColumn("key", dataType="int").execute()
        self.__verify_table_schema("test",
                                   deltaTable.toDF().schema, ["key"],
                                   [IntegerType()],
                                   nullables={"key"})

        # replace with a new column again
        deltaTable = DeltaTable.createOrReplace().tableName("test") \
            .addColumn("col1", dataType="int").execute()

        self.__verify_table_schema("test",
                                   deltaTable.toDF().schema, ["col1"],
                                   [IntegerType()],
                                   nullables={"col1"})
Example #3
0
 def __create_table(self, ifNotExists, tableName=None, location=None):
     builder = DeltaTable.createIfNotExists(self.spark) if ifNotExists \
         else DeltaTable.create(self.spark)
     if tableName:
         builder = builder.tableName(tableName)
     if location:
         builder = builder.location(location)
     return self.__build_delta_table(builder)
Example #4
0
 def test_verify_paritionedBy_compatibility(self):
     tableBuilder = DeltaTable.create(self.spark).tableName("testTable") \
         .addColumn("col1", "int", comment="foo", nullable=False) \
         .addColumn("col2", IntegerType(), generatedAlwaysAs="col1 + 10") \
         .property("foo", "bar") \
         .comment("comment")
     tableBuilder._jbuilder = tableBuilder._jbuilder \
         .partitionedBy(_to_seq(self.spark._sc, ["col1"]))
     deltaTable = tableBuilder.execute()
     self.__verify_table_schema(
         "testTable",
         deltaTable.toDF().schema, ["col1", "col2"],
         [IntegerType(), IntegerType()],
         nullables={"col2"},
         comments={"col1": "foo"},
         properties={"foo": "bar"},
         partitioningColumns=["col1"],
         tblComment="comment")
Example #5
0
    def test_delta_table_builder_with_bad_args(self) -> None:
        builder = DeltaTable.create(self.spark)

        # bad table name
        with self.assertRaises(TypeError):
            builder.tableName(1)  # type: ignore[arg-type]

        # bad location
        with self.assertRaises(TypeError):
            builder.location(1)  # type: ignore[arg-type]

        # bad comment
        with self.assertRaises(TypeError):
            builder.comment(1)  # type: ignore[arg-type]

        # bad column name
        with self.assertRaises(TypeError):
            builder.addColumn(1, "int")  # type: ignore[arg-type]

        # bad datatype.
        with self.assertRaises(TypeError):
            builder.addColumn("a", 1)  # type: ignore[arg-type]

        # bad column datatype - can't be pared
        with self.assertRaises(ParseException):
            builder.addColumn("a", "1")

        # bad comment
        with self.assertRaises(TypeError):
            builder.addColumn("a", "int", comment=1)  # type: ignore[arg-type]

        # bad generatedAlwaysAs
        with self.assertRaises(TypeError):
            builder.addColumn("a", "int", generatedAlwaysAs=1)  # type: ignore[arg-type]

        # bad nullable
        with self.assertRaises(TypeError):
            builder.addColumn("a", "int", nullable=1)  # type: ignore[arg-type]

        # bad existing schema
        with self.assertRaises(TypeError):
            builder.addColumns(1)  # type: ignore[arg-type]

        # bad existing schema.
        with self.assertRaises(TypeError):
            builder.addColumns([StructField("1", IntegerType()), 1])  # type: ignore[list-item]

        # bad partitionedBy col name
        with self.assertRaises(TypeError):
            builder.partitionedBy(1)  # type: ignore[call-overload]

        with self.assertRaises(TypeError):
            builder.partitionedBy(1, "1")   # type: ignore[call-overload]

        with self.assertRaises(TypeError):
            builder.partitionedBy([1])  # type: ignore[list-item]

        # bad property key
        with self.assertRaises(TypeError):
            builder.property(1, "1")  # type: ignore[arg-type]

        # bad property value
        with self.assertRaises(TypeError):
            builder.property("1", 1)  # type: ignore[arg-type]