def add_test_dimensions(cls): super(TestParquetArrayEncodings, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension( 'parquet_array_resolution', *TestParquetArrayEncodings.ARRAY_RESOLUTION_POLICIES)) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'parquet')
def add_test_dimensions(cls): super(TestScannersAllTableFormats, cls).add_test_dimensions() if cls.exploration_strategy() == 'core': # The purpose of this test is to get some base coverage of all the file formats. # Even in 'core', we'll test each format by using the pairwise strategy. cls.ImpalaTestMatrix.add_dimension(cls.create_table_info_dimension('pairwise')) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('batch_size', *TestScannersAllTableFormats.BATCH_SIZES))
def add_test_dimensions(cls): super(TestTextScanRangeLengths, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('max_scan_range_length', *MAX_SCAN_RANGE_LENGTHS)) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'text' and v. get_value('table_format').compression_codec == 'none')
def add_test_dimensions(cls): super(TestLocalTzConversion, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( cluster_sizes=[0], disable_codegen_options=[False, True], batch_sizes=[0])) # Test with and without expr rewrites to cover regular expr evaluations # as well as constant folding, in particular, timestamp literals. cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('enable_expr_rewrites', *[0,1]))
def add_test_dimensions(cls): super(TestWideTable, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS)) cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension("num_cols", *cls.NUM_COLS)) # To cut down on test execution time, only run in exhaustive. if cls.exploration_strategy() != 'exhaustive': cls.ImpalaTestMatrix.add_constraint(lambda v: False)
def add_test_dimensions(cls): super(TestJoinQueries, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('batch_size', *TestJoinQueries.BATCH_SIZES)) mt_dop_values = cls.MT_DOP_VALUES if cls.exploration_strategy() == 'exhaustive': mt_dop_values += cls.MT_DOP_VALUES_EXHAUSTIVE cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('mt_dop', *mt_dop_values)) # TODO: Look into splitting up join tests to accomodate hbase. # Joins with hbase tables produce drastically different results. cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format in ['parquet']) if cls.exploration_strategy() != 'exhaustive': # Cut down on execution time when not running in exhaustive mode. cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('batch_size') != 1)
def add_test_dimensions(cls): super(TestFailpoints, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('query', *QUERIES)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('action', *FAILPOINT_ACTIONS)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('location', *FAILPOINT_LOCATIONS)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('mt_dop', *MT_DOP_VALUES)) cls.ImpalaTestMatrix.add_dimension( create_exec_option_dimension([0], [False], [0])) # Don't create CLOSE:WAIT debug actions to avoid leaking plan fragments (there's no # way to cancel a plan fragment once Close() has been called) cls.ImpalaTestMatrix.add_constraint(lambda v: not (v.get_value( 'action') == 'CANCEL' and v.get_value('location') == 'CLOSE'))
def add_test_dimensions(cls): super(TestResultSpoolingFetchSize, cls).add_test_dimensions() # Create a test matrix with three different dimensions: BATCH_SIZE, the number of # rows to fetch at a time, and whether the tests should wait for all results to be # spooled before fetching any rows. cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( batch_sizes=cls._batch_sizes)) cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('fetch_size', *cls._fetch_sizes)) cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('wait_for_finished', *[True, False])) # Result spooling should be independent of file format, so only testing for # table_format=parquet/none in order to avoid a test dimension explosion. cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format == 'parquet' and v.get_value('table_format').compression_codec == 'none')
def add_test_dimensions(cls): super(TestTpchPrimitivesMemLimitError, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('mem_limit', *cls.MEM_IN_MB)) cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format in ['parquet'])
def add_test_dimensions(cls): super(TestNestedTArraysInSelectList, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value( 'table_format').file_format in ['parquet', 'orc']) cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', 0, 2)) cls.ImpalaTestMatrix.add_dimension( create_exec_option_dimension_from_dict( {'disable_codegen': ['False', 'True']})) cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
def add_test_dimensions(cls): super(TestQueryMemLimitScaling, cls).add_test_dimensions() # add mem_limit as a test dimension. new_dimension = ImpalaTestDimension( 'mem_limit', *TestQueryMemLimitScaling.MEM_LIMITS) cls.ImpalaTestMatrix.add_dimension(new_dimension) if cls.exploration_strategy() != 'exhaustive': cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format in ['parquet'])
def add_test_dimensions(cls): super(TestMiniStress, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('test_id', *TEST_IDS)) if cls.exploration_strategy() != 'exhaustive': cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('exec_option')['batch_size'] == 0) else: cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('exec_option')['batch_size'] != 1)
def add_test_dimensions(cls): super(TestTPCHJoinQueries, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('batch_size', *TestJoinQueries.BATCH_SIZES)) cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format in ['parquet']) if cls.exploration_strategy() != 'exhaustive': # Cut down on execution time when not running in exhaustive mode. cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('batch_size') != 1)
def add_test_dimensions(cls): super(TestRuntimeRowFilters, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ['parquet']) # Exercise both mt and non-mt code paths. Some tests assume 3 finstances, so # tests are not expected to work unmodified with higher mt_dop values. cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', 0, 4)) # Enable query option ASYNC_CODEGEN for slow build if build_runs_slowly: add_exec_option_dimension(cls, "async_codegen", 1)
def add_test_dimensions(cls): super(TestBzip2Streaming, cls).add_test_dimensions() if cls.exploration_strategy() != 'exhaustive': pytest.skip("skipping if it's not exhaustive test.") cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('max_scan_range_length', *cls.MAX_SCAN_RANGE_LENGTHS)) cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format == 'text' and\ v.get_value('table_format').compression_codec == 'bzip')
def add_test_dimensions(cls): super(TestResultSpoolingMaxReservation, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('debug_action', *cls.DEBUG_ACTION_VALUES)) # Result spooling should be independent of file format, so only testing for # table_format=parquet/none in order to avoid a test dimension explosion. cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format == 'parquet' and v.get_value('table_format').compression_codec == 'none')
def create_table_info_dimension(cls, exploration_strategy): # If the user has specified a specific set of table formats to run against, then # use those. Otherwise, load from the workload test vectors. if pytest.config.option.table_formats: table_formats = list() for tf in pytest.config.option.table_formats.split(','): dataset = get_dataset_from_workload(cls.get_workload()) table_formats.append(TableFormatInfo.create_from_string(dataset, tf)) tf_dimensions = ImpalaTestDimension('table_format', *table_formats) else: tf_dimensions = load_table_info_dimension(cls.get_workload(), exploration_strategy) # If 'skip_hbase' is specified or the filesystem is isilon, s3 or local, we don't # need the hbase dimension. if pytest.config.option.skip_hbase or TARGET_FILESYSTEM.lower() \ in ['s3', 'isilon', 'local', 'adls']: for tf_dimension in tf_dimensions: if tf_dimension.value.file_format == "hbase": tf_dimensions.remove(tf_dimension) break return tf_dimensions
def add_test_dimensions(cls): super(TestInsertParquetQueries, cls).add_test_dimensions() # Fix the exec_option vector to have a single value. This is needed should we decide # to run the insert tests in parallel (otherwise there will be two tests inserting # into the same table at the same time for the same file format). # TODO: When we do decide to run these tests in parallel we could create unique temp # tables for each test case to resolve the concurrency problems. cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( cluster_sizes=[0], disable_codegen_options=[False], batch_sizes=[0], sync_ddl=[1])) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension("compression_codec", *PARQUET_CODECS)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension("file_size", *PARQUET_FILE_SIZES)) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'parquet') cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').compression_codec == 'none')
def add_test_dimensions(cls): super(TestExprs, cls).add_test_dimensions() # Test with and without expr rewrites to cover regular expr evaluations # as well as constant folding, in particular, timestamp literals. cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('enable_expr_rewrites', *[0, 1])) if cls.exploration_strategy() == 'core': # Test with file format that supports codegen cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format == 'text' and\ v.get_value('table_format').compression_codec == 'none')
def create_table_info_dimension(cls, exploration_strategy): # If the user has specified a specific set of table formats to run against, then # use those. Otherwise, load from the workload test vectors. if pytest.config.option.table_formats: table_formats = list() for tf in pytest.config.option.table_formats.split(','): dataset = get_dataset_from_workload(cls.get_workload()) table_formats.append(TableFormatInfo.create_from_string(dataset, tf)) tf_dimensions = ImpalaTestDimension('table_format', *table_formats) else: tf_dimensions = load_table_info_dimension(cls.get_workload(), exploration_strategy) # If 'skip_hbase' is specified or the filesystem is isilon, s3 or local, we don't # need the hbase dimension. if pytest.config.option.skip_hbase or TARGET_FILESYSTEM.lower() \ in ['s3', 'isilon', 'local', 'abfs', 'adls']: for tf_dimension in tf_dimensions: if tf_dimension.value.file_format == "hbase": tf_dimensions.remove(tf_dimension) break return tf_dimensions
def add_test_dimensions(cls): super(TestInsertParquetVerifySize, cls).add_test_dimensions() # Fix the exec_option vector to have a single value. cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( cluster_sizes=[0], disable_codegen_options=[False], batch_sizes=[0], sync_ddl=[1])) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'parquet') cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').compression_codec == 'none') cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension("compression_codec", *PARQUET_CODECS))
def add_test_dimensions(cls): super(TestLimit, cls).add_test_dimensions() # Add two more dimensions if cls.exploration_strategy() == 'core': cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('limit_value', *TestLimit.LIMIT_VALUES_CORE)) else: cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('limit_value', *TestLimit.LIMIT_VALUES)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('query', *TestLimit.QUERIES)) # Don't run with large limits and tiny batch sizes. This generates excessive # network traffic and makes the machine run very slowly. cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('limit_value') < 100 or v.get_value('exec_option')['batch_size'] == 0) # TPCH is not generated in hbase format. # TODO: Add test coverage for hbase. cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format != "hbase")
def add_test_dimensions(cls): super(TestTableSample, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('repeatable', *[True, False])) # Tablesample is only supported on HDFS tables. cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format != 'kudu' and v. get_value('table_format').file_format != 'hbase') if cls.exploration_strategy() != 'exhaustive': # Cut down on core testing time by limiting the file formats. cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format == 'parquet' or v.get_value('table_format').file_format == 'text')
def add_test_dimensions(cls): super(TestRuntimeFilters, cls).add_test_dimensions() # Runtime filters are disabled on HBase cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format not in ['hbase']) # Exercise both mt and non-mt code paths. Some tests assume 3 finstances, so # tests are not expected to work unmodified with higher mt_dop values. cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', 0, 1)) # Don't test all combinations of file format and mt_dop, only test a few # representative formats. cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('table_format').file_format in ['parquet', 'text', 'kudu'] or v.get_value('mt_dop') == 0)
def add_test_dimensions(cls): super(TestAsyncLoadData, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( create_uncompressed_text_dimension(cls.get_workload())) # Test all clients: hs2, hs2-http and beeswax cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) # Test two exec modes per client cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('enable_async_load_data_execution', True, False)) # Disable codegen = false cls.ImpalaTestMatrix.add_dimension( create_exec_option_dimension(disable_codegen_options=[False]))
def add_test_dimensions(cls): super(TestCancellation, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('query', *QUERIES.keys())) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('query_type', *QUERY_TYPE)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('cancel_delay', *CANCEL_DELAY_IN_SECONDS)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('wait_action', *WAIT_ACTIONS)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('fail_rpc_action', *FAIL_RPC_ACTIONS)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('join_before_close', *JOIN_BEFORE_CLOSE)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('buffer_pool_limit', 0)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('cpu_limit_s', *CPU_LIMIT_S)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('mt_dop', *MT_DOP_VALUES)) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('query_type') != 'CTAS' or (\ v.get_value('table_format').file_format in ['text', 'parquet', 'kudu'] and\ v.get_value('table_format').compression_codec == 'none')) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('exec_option')['batch_size'] == 0) # Ignore 'compute stats' queries for the CTAS query type. cls.ImpalaTestMatrix.add_constraint( lambda v: not (v.get_value('query_type') == 'CTAS' and v.get_value( 'query').startswith('compute stats'))) # Ignore CTAS on Kudu if there is no PRIMARY KEY specified. cls.ImpalaTestMatrix.add_constraint(lambda v: not ( v.get_value('query_type') == 'CTAS' and v.get_value('table_format') .file_format == 'kudu' and QUERIES[v.get_value('query')] is None)) # tpch tables are not generated for hbase as the data loading takes a very long time. # TODO: Add cancellation tests for hbase. cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format != 'hbase') if cls.exploration_strategy() != 'core': NUM_CANCELATION_ITERATIONS = 3
def add_test_dimensions(cls): super(TestFailpoints, cls).add_test_dimensions() # Executing an explain on the the test query will fail in an enviornment where hbase # tables don't exist (s3). Since this happens before the tests are run, the skipif # marker won't catch it. If 's3' is detected as a file system, return immedietely. if os.getenv("TARGET_FILESYSTEM") in ["s3", "isilon", "local"]: return node_id_map = TestFailpoints.parse_plan_nodes_from_explain_output(QUERY, "functional") assert node_id_map cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('location', *FAILPOINT_LOCATION)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('target_node', *(node_id_map.items()))) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('action', *FAILPOINT_ACTION)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('query_type', *QUERY_TYPE)) cls.ImpalaTestMatrix.add_dimension( create_exec_option_dimension([0], [False], [0])) # These are invalid test cases. # For more info see IMPALA-55 and IMPALA-56. cls.ImpalaTestMatrix.add_constraint(lambda v: not ( v.get_value('action') == 'FAIL' and v.get_value('location') in ['CLOSE'] and v.get_value('target_node')[0] in ['AGGREGATE', 'HASH JOIN']) and not (v.get_value('location') in ['PREPARE'] and v.get_value('action') == 'CANCEL')) # Don't create CLOSE:WAIT debug actions to avoid leaking plan fragments (there's no # way to cancel a plan fragment once Close() has been called) cls.ImpalaTestMatrix.add_constraint( lambda v: not (v.get_value('action') == 'CANCEL' and v.get_value('location') == 'CLOSE')) # No need to test error in scanner preparation for non-scan nodes. cls.ImpalaTestMatrix.add_constraint( lambda v: (v.get_value('location') != 'PREPARE_SCANNER' or v.get_value('target_node')[0] == 'SCAN HDFS'))
def add_test_dimensions(cls): """ A hook for adding additional dimensions. By default load the table_info and exec_option dimensions, but if a test wants to add more dimensions or different dimensions they can override this function. """ super(ImpalaTestSuite, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( cls.create_table_info_dimension(cls.exploration_strategy())) cls.ImpalaTestMatrix.add_dimension(cls.__create_exec_option_dimension()) # Execute tests through Beeswax by default. Individual tests that have been converted # to work with the HS2 client can add HS2 in addition to or instead of beeswax. cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('protocol', 'beeswax'))
def add_test_dimensions(cls): super(TestAdmissionControllerStress, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('num_queries', *NUM_QUERIES)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('round_robin_submission', *ROUND_ROBIN_SUBMISSION)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('submission_delay_ms', *SUBMISSION_DELAY_MS)) # Additional constraints for code coverage jobs and core. num_queries = None if IMPALAD_BUILD.has_code_coverage(): # Code coverage builds can't handle the increased concurrency. num_queries = 15 elif cls.exploration_strategy() == 'core': num_queries = 30 cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('submission_delay_ms') == 0) cls.ImpalaTestMatrix.add_constraint(\ lambda v: v.get_value('round_robin_submission') == True) if num_queries is not None: cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('num_queries') == num_queries)
def add_test_dimensions(cls): super(TestHdfsEncryption, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) cls.ImpalaTestMatrix.add_dimension( create_uncompressed_text_dimension(cls.get_workload())) PARTITIONED = [True, False] # For 'core', just test loading from a directory that is encrypted. KEY_LOAD_DIR = ["testkey1"] KEY_TBL_DIR = [None] if cls.exploration_strategy() == 'exhaustive': KEY_LOAD_DIR = [None, "testkey1", "testkey2"] KEY_TBL_DIR = [None, "testkey1", "testkey2"] cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('partitioned', *PARTITIONED)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('key_load_dir', *KEY_LOAD_DIR)) cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('key_tbl_dir', *KEY_TBL_DIR)) cls.ImpalaTestMatrix.add_constraint(lambda v:\ v.get_value('key_load_dir') is not None or\ v.get_value('key_tbl_dir') is not None)
def add_test_dimensions(cls): super(TestDdlStress, cls).add_test_dimensions() if cls.exploration_strategy() != 'exhaustive': pytest.skip( "Should only run in exhaustive due to long execution time.") cls.ImpalaTestMatrix.add_dimension( ImpalaTestDimension('test_id', *TEST_IDS)) cls.ImpalaTestMatrix.add_constraint( lambda v: v.get_value('exec_option')['batch_size'] == 0) cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format == 'text' and\ v.get_value('table_format').compression_codec == 'none')