def test_create_external_table_avro(): path = '/path/to/files/' avro_schema = { 'fields': [ {'name': 'a', 'type': 'string'}, {'name': 'b', 'type': 'int'}, {'name': 'c', 'type': 'double'}, { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2, 'name': 'd', }, ], 'name': 'my_record', 'type': 'record', } stmt = ddl.CreateTableAvro( 'new_table', path, avro_schema, database='foo', can_exist=True ) result = stmt.compile() expected = ( """\ CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table` STORED AS AVRO LOCATION '%s' TBLPROPERTIES ( 'avro.schema.literal'='{ "fields": [ { "name": "a", "type": "string" }, { "name": "b", "type": "int" }, { "name": "c", "type": "double" }, { "logicalType": "decimal", "name": "d", "precision": 4, "scale": 2, "type": "bytes" } ], "name": "my_record", "type": "record" }' )""" % path ) assert result == expected
def avro_file(self, hdfs_dir, avro_schema, name=None, database=None, external=True, persist=False): """ Create a (possibly temporary) table to read a collection of Avro data. Parameters ---------- hdfs_dir : string Absolute HDFS path to directory containing avro files avro_schema : dict The Avro schema for the data as a Python dict name : string, default None database : string, default None external : boolean, default True persist : boolean, default False Returns ------- avro_table : ImpalaTable """ name, database = self._get_concrete_table_path(name, database, persist=persist) qualified_name = self._fully_qualified_name(name, database) stmt = ddl.CreateTableAvro(name, hdfs_dir, avro_schema, database=database, external=external) self._execute(stmt) return self._wrap_new_table(qualified_name, persist)