예제 #1
0
def test_create_external_table_avro():
    path = '/path/to/files/'

    avro_schema = {
        'fields': [
            {'name': 'a', 'type': 'string'},
            {'name': 'b', 'type': 'int'},
            {'name': 'c', 'type': 'double'},
            {
                "type": "bytes",
                "logicalType": "decimal",
                "precision": 4,
                "scale": 2,
                'name': 'd',
            },
        ],
        'name': 'my_record',
        'type': 'record',
    }

    stmt = ddl.CreateTableAvro(
        'new_table', path, avro_schema, database='foo', can_exist=True
    )

    result = stmt.compile()
    expected = (
        """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
STORED AS AVRO
LOCATION '%s'
TBLPROPERTIES (
  'avro.schema.literal'='{
  "fields": [
    {
      "name": "a",
      "type": "string"
    },
    {
      "name": "b",
      "type": "int"
    },
    {
      "name": "c",
      "type": "double"
    },
    {
      "logicalType": "decimal",
      "name": "d",
      "precision": 4,
      "scale": 2,
      "type": "bytes"
    }
  ],
  "name": "my_record",
  "type": "record"
}'
)"""
        % path
    )
    assert result == expected
예제 #2
0
    def avro_file(
        self,
        hdfs_dir,
        avro_schema,
        name=None,
        database=None,
        external=True,
        persist=False,
    ):
        """Create a table to read a collection of Avro data.

        Parameters
        ----------
        hdfs_dir
            Absolute HDFS path to directory containing avro files
        avro_schema
            The Avro schema for the data as a Python dict
        name
            Table name
        database
            Database name
        external
            Whether the table is external
        persist
            Persist the table

        Returns
        -------
        ImpalaTable
            Impala table expression
        """
        name, database = self._get_concrete_table_path(
            name, database, persist=persist
        )

        stmt = ddl.CreateTableAvro(
            name, hdfs_dir, avro_schema, database=database, external=external
        )
        self.raw_sql(stmt)
        return self._wrap_new_table(name, database, persist)