def test_create_index_cols(self): context = QueryContext(self.spark) table_path = os.path.join(self.dirpath, 'table.parquet') self.spark.range(0, 10).withColumn('str', lit('abc')).write.parquet(table_path) context.index.create.indexBy('id', 'str').parquet(table_path) self.assertTrue(context.index.exists.parquet(table_path))
def test_manager_set_options(self): context = QueryContext(self.spark) manager = context.index.option('a', '1').options({ 'a': '2', 'b': 3, 'c': True }) self.assertEqual(manager._options, {'a': '2', 'b': '3', 'c': 'True'})
def test_create_query_index_empty_table(self): context = QueryContext(self.spark) table_path = os.path.join(self.dirpath, 'table.parquet') self.spark.range(0, 10).filter('id < 0') \ .withColumn('str', lit('abc')).write.parquet(table_path) context.index.create.indexByAll().parquet(table_path) res = context.index.parquet(table_path).filter('id = 3').collect() self.assertEqual(res, [])
def test_create_index_mode(self): context = QueryContext(self.spark) table_path = os.path.join(self.dirpath, 'table.parquet') self.spark.range(0, 10).withColumn('str', lit('abc')).write.parquet(table_path) context.index.create.mode('error').indexByAll().parquet(table_path) context.index.create.mode('overwrite').indexByAll().parquet(table_path) self.assertTrue(context.index.exists.parquet(table_path))
def test_manager_set_option(self): context = QueryContext(self.spark) manager = context.index.option('key1', '1').option('key2', 2).option('key3', True) self.assertEqual(manager._options, { 'key1': '1', 'key2': '2', 'key3': 'True' })
def test_create_command_wrong_mode(self): context = QueryContext(self.spark) cmd = context.index.create.mode(None) error_msg = None try: cmd.parquet(None) except Exception as err: error_msg = str(err) self.assertTrue(error_msg is not None) self.assertTrue('Unsupported mode None' in error_msg)
def test_create_query_index_catalog_table(self): context = QueryContext(self.spark) tableName = "test_parquet_table" self.spark.range(0, 10).withColumn( 'str', lit('abc')).write.saveAsTable(tableName) try: context.index.create.indexByAll().table(tableName) res1 = context.index.table(tableName).filter('id = 3').collect() res2 = self.spark.table(tableName).filter('id = 3').collect() self.assertEqual(res1, res2) finally: self.spark.sql("drop table " + tableName)
def test_create_overwrite_index_catalog_table(self): context = QueryContext(self.spark) tableName = "test_parquet_table" self.spark.range(0, 10).withColumn( 'str', lit('abc')).write.saveAsTable(tableName) try: context.index.create.indexByAll().table(tableName) context.index.create.mode('overwrite').indexBy('id').table( tableName) self.assertTrue(context.index.exists.table(tableName)) finally: self.spark.sql("drop table " + tableName)
def test_manager_set_options_wrong(self): context = QueryContext(self.spark) with self.assertRaises(AttributeError): context.index.options(None)
def test_manager_set_many_sources(self): context = QueryContext(self.spark) manager = context.index.format('a').format('b').format('c') self.assertEqual(manager._source, 'c')
def test_manager_set_source(self): context = QueryContext(self.spark) manager = context.index.format('test-format') self.assertEqual(manager._source, 'test-format')
def test_index_wrong_init(self): with self.assertRaises(AttributeError): QueryContext(None)
def test_create_command_index_by_all(self): context = QueryContext(self.spark) cmd = context.index.create.indexByAll() self.assertEqual(cmd._columns, None)
def test_create_command_index_by_cols(self): context = QueryContext(self.spark) cmd = context.index.create.indexBy('a', 'b') self.assertEqual(cmd._columns, ['a', 'b'])
def test_create_command_mode(self): context = QueryContext(self.spark) cmd = context.index.create.mode('overwrite').mode('ignore') self.assertEqual(cmd._mode, 'ignore')