Python create_table_and_copy_files 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tests.common.file_utils

메소드/함수: create_table_and_copy_files

hotexamples.com에서의 예제들: 5

Python create_table_and_copy_files - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tests.common.file_utils.create_table_and_copy_files에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

 def create_test_database(self, unique_database):
     create_stmt = 'create table {db}.{tbl} (        '\
                   '  int8_col TINYINT,              '\
                   '  int16_col SMALLINT,            '\
                   '  int32_col INT,                 '\
                   '  int64_col BIGINT,              '\
                   '  float_col FLOAT,               '\
                   '  double_col DOUBLE,             '\
                   '  string_col STRING,             '\
                   '  char_col VARCHAR(3)            '\
                   ')                                '\
                   'stored as parquet                '
     create_table_and_copy_files(
         self.client, create_stmt, unique_database, 'parquet_bloom_filter',
         ['testdata/data/parquet-bloom-filtering.parquet'])

예제 #2

파일 보기

파일: test_parquet_stats.py 프로젝트: allenking3/impala-1

  def test_deprecated_stats(self, vector, unique_database):
    """Test that reading parquet files with statistics with deprecated 'min'/'max' fields
    works correctly. The statistics will be used for known-good types (boolean, integral,
    float) and will be ignored for all other types (string, decimal, timestamp)."""

    # We use CTAS instead of "create table like" to convert the partition columns into
    # normal table columns.
    create_table_and_copy_files(self.client, 'create table {db}.{tbl} stored as parquet '
                                             'as select * from functional.alltypessmall '
                                             'limit 0',
                                unique_database, 'deprecated_stats',
                                ['testdata/data/deprecated_statistics.parquet'])
    # The test makes assumptions about the number of row groups that are processed and
    # skipped inside a fragment, so we ensure that the tests run in a single fragment.
    vector.get_value('exec_option')['num_nodes'] = 1
    self.run_test_case('QueryTest/parquet-deprecated-stats', vector, unique_database)

예제 #3

파일 보기

파일: test_parquet_stats.py 프로젝트: apache/incubator-impala

  def test_deprecated_stats(self, vector, unique_database):
    """Test that reading parquet files with statistics with deprecated 'min'/'max' fields
    works correctly. The statistics will be used for known-good types (boolean, integral,
    float) and will be ignored for all other types (string, decimal, timestamp)."""

    # We use CTAS instead of "create table like" to convert the partition columns into
    # normal table columns.
    create_table_and_copy_files(self.client, 'create table {db}.{tbl} stored as parquet '
                                             'as select * from functional.alltypessmall '
                                             'limit 0',
                                unique_database, 'deprecated_stats',
                                ['testdata/data/deprecated_statistics.parquet'])
    # The test makes assumptions about the number of row groups that are processed and
    # skipped inside a fragment, so we ensure that the tests run in a single fragment.
    vector.get_value('exec_option')['num_nodes'] = 1
    self.run_test_case('QueryTest/parquet-deprecated-stats', vector, unique_database)

예제 #4

파일 보기

 def test_fileformat_support(self, vector, unique_database):
     """ Test that scanning and writing DATE is supported for text tables only."""
     # This test specifies databases and locations explicitly. No need to execute it for
     # anything other than text fileformat on HDFS.
     if vector.get_value('table_format').file_format != 'text':
         pytest.skip()
     # Parquet table with date column
     TABLE_NAME = "parquet_date_tbl"
     CREATE_SQL = "CREATE TABLE {0}.{1} (date_col DATE) STORED AS PARQUET".format(
         unique_database, TABLE_NAME)
     create_table_and_copy_files(self.client, CREATE_SQL, unique_database,
                                 TABLE_NAME,
                                 ["/testdata/data/date_tbl.parquet"])
     # Avro table with date column
     TABLE_NAME = "avro_date_tbl"
     CREATE_SQL = "CREATE TABLE {0}.{1} (date_col DATE) STORED AS AVRO".format(
         unique_database, TABLE_NAME)
     create_table_and_copy_files(self.client, CREATE_SQL, unique_database,
                                 TABLE_NAME,
                                 ["/testdata/data/date_tbl.avro"])
     # Partitioned table with parquet and avro partitions
     TABLE_NAME = "date_tbl"
     CREATE_SQL = """CREATE TABLE {0}.{1} (date_col DATE)
     PARTITIONED BY (date_part DATE)""".format(unique_database, TABLE_NAME)
     self.client.execute(CREATE_SQL)
     # Add partitions
     ADD_PART_SQL = """ALTER TABLE {0}.{1} ADD PARTITION (date_part='1899-12-31')
     LOCATION '/test-warehouse/{0}.db/parquet_date_tbl'
     PARTITION (date_part='1999-12-31')
     LOCATION '/test-warehouse/{0}.db/avro_date_tbl'
     """.format(unique_database, TABLE_NAME)
     self.client.execute(ADD_PART_SQL)
     # Parquet fileformat
     SET_PART_FF_SQL = """ALTER TABLE {0}.{1} PARTITION (date_part='1899-12-31')
     SET FILEFORMAT PARQUET""".format(unique_database, TABLE_NAME)
     self.client.execute(SET_PART_FF_SQL)
     # Avro fileformat
     SET_PART_FF_SQL = """ALTER TABLE {0}.{1} PARTITION (date_part='1999-12-31')
     SET FILEFORMAT AVRO""".format(unique_database, TABLE_NAME)
     self.client.execute(SET_PART_FF_SQL)
     # Test scanning/writing tables with different fileformats.
     self.run_test_case('QueryTest/date-fileformat-support',
                        vector,
                        use_db=unique_database)

예제 #5

파일 보기

파일: test_parquet_bloom_filter.py 프로젝트: apache/impala

 def _create_test_table_from_file(self, db_name, filename):
     create_table_and_copy_files(self.client, self.create_stmt, db_name,
                                 'parquet_bloom_filter', [filename])