Beispiel #1
0
def create_exec_option_dimension_from_dict(exec_option_dimensions):
    """
  Builds a query exec option test dimension

  Exhaustively goes through all combinations of the given query option values.
  For each combination create an exec option dictionary and add it as a value in the
  exec option test dimension. Each dictionary can then be passed via Beeswax to control
  Impala query execution behavior.

  TODO: In the future we could generate these values using pairwise to reduce total
  execution time.
  """
    # Generate the cross product (all combinations) of the exec options specified. Then
    # store them in exec_option dictionary format.
    keys = sorted(exec_option_dimensions)
    combinations = product(*(exec_option_dimensions[name] for name in keys))
    exec_option_dimension_values = [
        dict(zip(keys, prod)) for prod in combinations
    ]

    # Build a test vector out of it
    return ImpalaTestDimension('exec_option', *exec_option_dimension_values)
Beispiel #2
0
 def add_test_dimensions(cls):
     super(TestInsertQueries, cls).add_test_dimensions()
     # Fix the exec_option vector to have a single value. This is needed should we decide
     # to run the insert tests in parallel (otherwise there will be two tests inserting
     # into the same table at the same time for the same file format).
     # TODO: When we do decide to run these tests in parallel we could create unique temp
     # tables for each test case to resolve the concurrency problems.
     if cls.exploration_strategy() == 'core':
         cls.ImpalaTestMatrix.add_dimension(
             create_exec_option_dimension(cluster_sizes=[0],
                                          disable_codegen_options=[False],
                                          batch_sizes=[0],
                                          sync_ddl=[0]))
         cls.ImpalaTestMatrix.add_dimension(
             create_uncompressed_text_dimension(cls.get_workload()))
     else:
         cls.ImpalaTestMatrix.add_dimension(
             create_exec_option_dimension(cluster_sizes=[0],
                                          disable_codegen_options=[False],
                                          batch_sizes=[0, 1, 16],
                                          sync_ddl=[0, 1]))
         cls.ImpalaTestMatrix.add_dimension(
             ImpalaTestDimension("compression_codec", *PARQUET_CODECS))
         # Insert is currently only supported for text and parquet
         # For parquet, we want to iterate through all the compression codecs
         # TODO: each column in parquet can have a different codec.  We could
         # test all the codecs in one table/file with some additional flags.
         cls.ImpalaTestMatrix.add_constraint(lambda v:\
             v.get_value('table_format').file_format == 'parquet' or \
               (v.get_value('table_format').file_format == 'text' and \
               v.get_value('compression_codec') == 'none'))
         cls.ImpalaTestMatrix.add_constraint(lambda v:\
             v.get_value('table_format').compression_codec == 'none')
         # Only test other batch sizes for uncompressed parquet to keep the execution time
         # within reasonable bounds.
         cls.ImpalaTestMatrix.add_constraint(lambda v:\
             v.get_value('exec_option')['batch_size'] == 0 or \
               (v.get_value('table_format').file_format == 'parquet' and \
               v.get_value('compression_codec') == 'none'))
Beispiel #3
0
 def create_table_info_dimension(cls, exploration_strategy):
     # If the user has specified a specific set of table formats to run against, then
     # use those. Otherwise, load from the workload test vectors.
     if pytest.config.option.table_formats:
         table_formats = list()
         for tf in pytest.config.option.table_formats.split(','):
             dataset = get_dataset_from_workload(cls.get_workload())
             table_formats.append(
                 TableFormatInfo.create_from_string(dataset, tf))
         tf_dimensions = ImpalaTestDimension('table_format', *table_formats)
     else:
         tf_dimensions = load_table_info_dimension(cls.get_workload(),
                                                   exploration_strategy)
     # If 'skip_hbase' is specified or the filesystem is isilon, s3 or local, we don't
     # need the hbase dimension.
     if pytest.config.option.skip_hbase or TARGET_FILESYSTEM.lower() \
         in ['s3', 'isilon', 'local', 'abfs', 'adls']:
         for tf_dimension in tf_dimensions:
             if tf_dimension.value.file_format == "hbase":
                 tf_dimensions.remove(tf_dimension)
                 break
     return tf_dimensions
def load_table_info_dimension(workload_name,
                              exploration_strategy,
                              file_formats=None,
                              compression_codecs=None):
    """Loads test vector corresponding to the given workload and exploration strategy"""
    test_vector_file = os.path.join(
        WORKLOAD_DIR, workload_name,
        '%s_%s.csv' % (workload_name, exploration_strategy))

    if not os.path.isfile(test_vector_file):
        raise RuntimeError, 'Vector file not found: ' + test_vector_file

    vector_values = []

    with open(test_vector_file, 'rb') as vector_file:
        for line in vector_file.readlines():
            if line.strip().startswith('#'):
                continue

            # Extract each test vector and add them to a dictionary
            vals = dict((key.strip(), value.strip()) for key, value in\
                (item.split(':') for item in line.split(',')))

            # Skip Kudu if Kudu is not supported (IMPALA-4287).
            if os.environ['KUDU_IS_SUPPORTED'] != 'true' and vals[
                    'file_format'] == 'kudu':
                continue

            # If only loading specific file formats skip anything that doesn't match
            if file_formats is not None and vals[
                    'file_format'] not in file_formats:
                continue
            if compression_codecs is not None and\
               vals['compression_codec'] not in compression_codecs:
                continue
            vector_values.append(TableFormatInfo(**vals))

    return ImpalaTestDimension('table_format', *vector_values)
Beispiel #5
0
    def add_test_dimensions(cls):
        super(TestCancellation, cls).add_test_dimensions()
        cls.ImpalaTestMatrix.add_dimension(
            ImpalaTestDimension('query', *QUERIES.keys()))
        cls.ImpalaTestMatrix.add_dimension(
            ImpalaTestDimension('query_type', *QUERY_TYPE))
        cls.ImpalaTestMatrix.add_dimension(
            ImpalaTestDimension('cancel_delay', *CANCEL_DELAY_IN_SECONDS))
        cls.ImpalaTestMatrix.add_dimension(
            ImpalaTestDimension('wait_action', *WAIT_ACTIONS))
        cls.ImpalaTestMatrix.add_dimension(
            ImpalaTestDimension('fail_rpc_action', *FAIL_RPC_ACTIONS))
        cls.ImpalaTestMatrix.add_dimension(
            ImpalaTestDimension('join_before_close', *JOIN_BEFORE_CLOSE))
        cls.ImpalaTestMatrix.add_dimension(
            ImpalaTestDimension('buffer_pool_limit', 0))

        cls.ImpalaTestMatrix.add_constraint(
            lambda v: v.get_value('query_type') != 'CTAS' or (\
                v.get_value('table_format').file_format in ['text', 'parquet', 'kudu'] and\
                v.get_value('table_format').compression_codec == 'none'))
        cls.ImpalaTestMatrix.add_constraint(
            lambda v: v.get_value('exec_option')['batch_size'] == 0)
        # Ignore 'compute stats' queries for the CTAS query type.
        cls.ImpalaTestMatrix.add_constraint(
            lambda v: not (v.get_value('query_type') == 'CTAS' and v.get_value(
                'query').startswith('compute stats')))

        # Ignore CTAS on Kudu if there is no PRIMARY KEY specified.
        cls.ImpalaTestMatrix.add_constraint(lambda v: not (
            v.get_value('query_type') == 'CTAS' and v.get_value('table_format')
            .file_format == 'kudu' and QUERIES[v.get_value('query')] is None))

        # tpch tables are not generated for hbase as the data loading takes a very long time.
        # TODO: Add cancellation tests for hbase.
        cls.ImpalaTestMatrix.add_constraint(lambda v:\
            v.get_value('table_format').file_format != 'hbase')
        if cls.exploration_strategy() != 'core':
            NUM_CANCELATION_ITERATIONS = 3
Beispiel #6
0
 def add_test_dimensions(cls):
   super(TestScanRangeLengths, cls).add_test_dimensions()
   cls.ImpalaTestMatrix.add_dimension(
       ImpalaTestDimension('max_scan_range_length', *MAX_SCAN_RANGE_LENGTHS))
Beispiel #7
0
 def add_test_dimensions(cls):
   super(TestWideTable, cls).add_test_dimensions()
   cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension("num_cols", *cls.NUM_COLS))
   # To cut down on test execution time, only run in exhaustive.
   if cls.exploration_strategy() != 'exhaustive':
     cls.ImpalaTestMatrix.add_constraint(lambda v: False)
Beispiel #8
0
def create_beeswax_dimension():
    return ImpalaTestDimension('protocol', 'beeswax')
Beispiel #9
0
def create_avro_snappy_dimension(workload):
    dataset = get_dataset_from_workload(workload)
    return ImpalaTestDimension(
        'table_format',
        TableFormatInfo.create_from_string(dataset, 'avro/snap/block'))
Beispiel #10
0
 def add_test_dimensions(cls):
   cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
       'table_format_and_file_extension',
       *[('parquet', '.parq'), ('textfile', '.txt')]))
Beispiel #11
0
def create_client_protocol_no_strict_dimension():
  return ImpalaTestDimension('strict_hs2_protocol', False)
 def add_test_dimensions(cls):
     super(TestMtDop, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(
         ImpalaTestDimension('mt_dop', *MT_DOP_VALUES))
def create_parquet_dimension(workload):
    dataset = get_dataset_from_workload(workload)
    return ImpalaTestDimension(
        'table_format',
        TableFormatInfo.create_from_string(dataset, 'parquet/none'))
Beispiel #14
0
 def add_test_dimensions(cls):
     super(TestMtDopAdmissionSlots, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', 4))
     cls.ImpalaTestMatrix.add_constraint(
         lambda v: v.get_value('table_format').file_format == 'parquet')
Beispiel #15
0
 def add_test_dimensions(cls):
     super(TestNestedTypes, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value(
         'table_format').file_format in ['parquet', 'orc'])
     cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', 0, 2))
Beispiel #16
0
 def add_test_dimensions(cls):
   super(TestMaxNestingDepth, cls).add_test_dimensions()
   cls.ImpalaTestMatrix.add_constraint(lambda v:
       v.get_value('table_format').file_format in ['parquet', 'orc'])
   cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('orc_schema_resolution', 0, 1))
   cls.ImpalaTestMatrix.add_constraint(orc_schema_resolution_constraint)
Beispiel #17
0
def create_beeswax_hs2_hs2http_dimension():
    return ImpalaTestDimension('protocol', 'beeswax', 'hs2', 'hs2-http')
Beispiel #18
0
 def add_test_dimensions(cls):
     super(TestHdfsFileMods, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('modification_type',\
         *MODIFICATION_TYPES))
     cls.ImpalaTestMatrix.add_constraint(cls.file_format_constraint)
def create_uncompressed_text_dimension(workload):
    dataset = get_dataset_from_workload(workload)
    return ImpalaTestDimension(
        'table_format',
        TableFormatInfo.create_from_string(dataset, 'text/none'))
 def add_test_dimensions(cls):
     super(TestHashJoinTimer, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(
         ImpalaTestDimension('test cases', *cls.TEST_CASES))
     cls.ImpalaTestMatrix.add_constraint(
         lambda v: cls.__is_valid_test_vector(v))
Beispiel #21
0
 def add_test_dimensions(cls):
     super(TestRowsAvailability, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(
         ImpalaTestDimension('query', *cls.QUERIES))
     cls.ImpalaTestMatrix.add_constraint(
         lambda v: cls.__is_valid_test_vector(v))
Beispiel #22
0
 def add_test_dimensions(cls):
   super(TestParquetArrayEncodings, cls).add_test_dimensions()
   cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
     'parquet_array_resolution', *TestParquetArrayEncodings.ARRAY_RESOLUTION_POLICIES))
   cls.ImpalaTestMatrix.add_constraint(lambda v:
       v.get_value('table_format').file_format == 'parquet')
 def add_test_dimensions(cls):
     super(TestMtDopParquet, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(
         ImpalaTestDimension('mt_dop', *MT_DOP_VALUES))
     cls.ImpalaTestMatrix.add_constraint(
         lambda v: v.get_value('table_format').file_format == 'parquet')
Beispiel #24
0
def create_orc_dimension(workload):
  dataset = get_dataset_from_workload(workload)
  return ImpalaTestDimension('table_format',
      TableFormatInfo.create_from_string(dataset, 'orc/def'))