def test_list_tables(self): from pyspark.sql.catalog import Table spark = self.spark with self.database("some_db"): spark.sql("CREATE DATABASE some_db") with self.table("tab1", "some_db.tab2", "tab3_via_catalog"): with self.tempView("temp_tab"): self.assertEquals(spark.catalog.listTables(), []) self.assertEquals(spark.catalog.listTables("some_db"), []) spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab") spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet") spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet") schema = StructType([StructField("a", IntegerType(), True)]) description = "this a table created via Catalog.createTable()" spark.catalog.createTable( "tab3_via_catalog", schema=schema, description=description) tables = sorted(spark.catalog.listTables(), key=lambda t: t.name) tablesDefault = \ sorted(spark.catalog.listTables("default"), key=lambda t: t.name) tablesSomeDb = \ sorted(spark.catalog.listTables("some_db"), key=lambda t: t.name) self.assertEquals(tables, tablesDefault) self.assertEquals(len(tables), 3) self.assertEquals(len(tablesSomeDb), 2) self.assertEquals(tables[0], Table( name="tab1", database="default", description=None, tableType="MANAGED", isTemporary=False)) self.assertEquals(tables[1], Table( name="tab3_via_catalog", database="default", description=description, tableType="MANAGED", isTemporary=False)) self.assertEquals(tables[2], Table( name="temp_tab", database=None, description=None, tableType="TEMPORARY", isTemporary=True)) self.assertEquals(tablesSomeDb[0], Table( name="tab2", database="some_db", description=None, tableType="MANAGED", isTemporary=False)) self.assertEquals(tablesSomeDb[1], Table( name="temp_tab", database=None, description=None, tableType="TEMPORARY", isTemporary=True)) self.assertRaisesRegexp( AnalysisException, "does_not_exist", lambda: spark.catalog.listTables("does_not_exist"))
def test_scrape_all_tables(self) -> None: tables = [Table(name="test_table1", database="test_schema1", description=None, tableType="delta", isTemporary=False), Table(name="test_table3", database="test_schema1", description=None, tableType="delta", isTemporary=False)] actual = self.dExtractor.scrape_all_tables(tables) self.assertEqual(2, len(actual))
def test_scrape_tables(self) -> None: table = Table(name="test_table1", database="test_schema1", description=None, tableType="delta", isTemporary=False) actual = self.dExtractor.scrape_table(table) expected = ScrapedTableMetadata(schema="test_schema1", table="test_table1") expected.set_columns([ ScrapedColumnMetadata(name="a", description=None, data_type="string", sort_order=0), ScrapedColumnMetadata(name="b", description=None, data_type="int", sort_order=1) ]) if actual is not None: self.assertEqual(expected.schema, actual.schema) self.assertEqual(expected.table, actual.table) self.assertEqual(expected.columns, actual.columns) self.assertEqual(expected.failed_to_scrape, actual.failed_to_scrape) self.assertEqual(expected.is_view, actual.is_view) self.assertIsNotNone(actual.table_detail) else: self.assertIsNotNone(actual)
def test_table_does_not_exist(self) -> None: table = Table(name="test_table5", database="test_schema1", description=None, tableType="delta", isTemporary=False) actual = self.dExtractor.scrape_table(table) self.assertIsNone(actual)
def test_create_last_updated(self) -> None: scraped_table = self.dExtractor.scrape_table( Table("test_table1", "test_schema1", None, "delta", False)) actual_last_updated = None if scraped_table: actual_last_updated = self.dExtractor.create_table_last_updated( scraped_table) self.assertIsNotNone(actual_last_updated)
def makeTable( name, database, description, tableType, isTemporary, ): return Table( name=name, catalog=None, namespace=[database] if database is not None else None, description=description, tableType=tableType, isTemporary=isTemporary, )