def test_fetch_first_with_exhausted_cache(self):
    """Regression test for IMPALA-4580. If a result cache is large enough to include all
    results, and the fetch is restarted after all rows have been fetched, the final fetch
    (internally) that returns EOS is not idempotent and can crash."""
    RESULT_SET_SIZE = 100
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.confOverlay = dict()
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] =\
      str(RESULT_SET_SIZE)
    execute_statement_req.statement =\
      "SELECT * FROM functional.alltypes ORDER BY id LIMIT %s" % RESULT_SET_SIZE
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    # First fetch more than the entire result set, ensuring that coordinator has hit EOS
    # condition.
    self.fetch_until(execute_statement_resp.operationHandle,
                     TCLIService.TFetchOrientation.FETCH_NEXT, RESULT_SET_SIZE + 1,
                     RESULT_SET_SIZE)

    # Now restart the fetch, again trying to fetch more than the full result set size so
    # that the cache is exhausted and the coordinator is checked for more rows.
    self.fetch_until(execute_statement_resp.operationHandle,
                     TCLIService.TFetchOrientation.FETCH_FIRST, RESULT_SET_SIZE + 1,
                     RESULT_SET_SIZE)
    self.close(execute_statement_resp.operationHandle)
Esempio n. 2
0
  def __test_invalid_result_caching(self, sql_stmt):
    """ Tests that invalid requests for query-result caching fail
    using the given sql_stmt."""
    impala_cluster = ImpalaCluster()
    impalad = impala_cluster.impalads[0].service

    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = sql_stmt
    execute_statement_req.confOverlay = dict()

    # Test that a malformed result-cache size returns an error.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "bad_number"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp,
        TCLIService.TStatusCode.ERROR_STATUS,
        "Invalid value 'bad_number' for 'impala.resultset.cache.size' option")
    self.__verify_num_cached_rows(0)
    assert 0 == impalad.get_num_in_flight_queries()

    # Test that a result-cache size exceeding the per-Impalad maximum returns an error.
    # The default maximum result-cache size is 100000.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "100001"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp,
        TCLIService.TStatusCode.ERROR_STATUS,
        "Requested result-cache size of 100001 exceeds Impala's maximum of 100000")
    self.__verify_num_cached_rows(0)
    assert 0 == impalad.get_num_in_flight_queries()
Esempio n. 3
0
  def test_select_null(self):
    """Regression test for IMPALA-1370, where NULL literals would appear as strings where
    they should be booleans"""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "select null"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    # Check that the expected type is boolean (for compatibility with Hive, see also
    # IMPALA-914)
    get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
    get_result_metadata_req.operationHandle = execute_statement_resp.operationHandle
    get_result_metadata_resp = \
        self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
    col = get_result_metadata_resp.schema.columns[0]
    assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE

    # Check that the actual type is boolean
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 1
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    assert fetch_results_resp.results.columns[0].boolVal is not None

    assert self.column_results_to_string(
      fetch_results_resp.results.columns) == (1, "NULL\n")
    def __test_invalid_result_caching(self, sql_stmt):
        """ Tests that invalid requests for query-result caching fail
    using the given sql_stmt."""
        impala_cluster = ImpalaCluster.get_e2e_test_cluster()
        impalad = impala_cluster.impalads[0].service

        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = sql_stmt
        execute_statement_req.confOverlay = dict()

        # Test that a malformed result-cache size returns an error.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "bad_number"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Invalid value 'bad_number' for 'impala.resultset.cache.size' option"
        )
        self.__verify_num_cached_rows(0)
        assert 0 == impalad.get_num_in_flight_queries()

        # Test that a result-cache size exceeding the per-Impalad maximum returns an error.
        # The default maximum result-cache size is 100000.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "100001"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Requested result-cache size of 100001 exceeds Impala's maximum of 100000"
        )
        self.__verify_num_cached_rows(0)
        assert 0 == impalad.get_num_in_flight_queries()
    def test_fetch_first_with_exhausted_cache(self):
        """Regression test for IMPALA-4580. If a result cache is large enough to include all
    results, and the fetch is restarted after all rows have been fetched, the final fetch
    (internally) that returns EOS is not idempotent and can crash."""
        RESULT_SET_SIZE = 100
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = dict()
        execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] =\
          str(RESULT_SET_SIZE)
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypes ORDER BY id LIMIT %s" % RESULT_SET_SIZE
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)

        # First fetch more than the entire result set, ensuring that coordinator has hit EOS
        # condition.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT,
                         RESULT_SET_SIZE + 1, RESULT_SET_SIZE)

        # Now restart the fetch, again trying to fetch more than the full result set size so
        # that the cache is exhausted and the coordinator is checked for more rows.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST,
                         RESULT_SET_SIZE + 1, RESULT_SET_SIZE)
        self.close(execute_statement_resp.operationHandle)
Esempio n. 6
0
  def test_select_null(self):
    """Regression test for IMPALA-1370, where NULL literals would appear as strings where
    they should be booleans"""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "select null"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    # Check that the expected type is boolean (for compatibility with Hive, see also
    # IMPALA-914)
    get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
    get_result_metadata_req.operationHandle = execute_statement_resp.operationHandle
    get_result_metadata_resp = \
        self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
    col = get_result_metadata_resp.schema.columns[0]
    assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE

    # Check that the actual type is boolean
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 1
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    assert fetch_results_resp.results.columns[0].boolVal is not None

    assert self.column_results_to_string(
      fetch_results_resp.results.columns) == (1, "NULL\n")
Esempio n. 7
0
  def test_alltypes_v1(self):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle

    execute_statement_req.statement =\
        "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    self.close(execute_statement_resp.operationHandle)

    execute_statement_req.statement =\
        "SELECT d1,d5 FROM functional.decimal_tbl ORDER BY d1 LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1

    # Verify the result schema is what we expect. The result has 2 columns, the
    # first is decimal(9,0) and the second is decimal(10,5)
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 2
    self.__verify_result_precision_scale(column_types[0], 9, 0)
    self.__verify_result_precision_scale(column_types[1], 10, 5)

    self.close(execute_statement_resp.operationHandle)
Esempio n. 8
0
 def __get_runtime_profile(self, op_handle):
     """Helper method to get the runtime profile from a given operation handle."""
     get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
     get_profile_req.operationHandle = op_handle
     get_profile_req.sessionHandle = self.session_handle
     get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
     HS2TestSuite.check_response(get_profile_resp)
     return get_profile_resp.profile
Esempio n. 9
0
def ScopedSession(hs2_client, *args, **kwargs):
    try:
        open_session_req = TCLIService.TOpenSessionReq(*args, **kwargs)
        session = hs2_client.OpenSession(open_session_req)
        yield session
    finally:
        if session.status.statusCode != TCLIService.TStatusCode.SUCCESS_STATUS:
            return
        close_session_req = TCLIService.TCloseSessionReq()
        close_session_req.sessionHandle = session.sessionHandle
        HS2TestSuite.check_response(hs2_client.CloseSession(close_session_req))
  def __check_hs2_query_opts(self, pool_name, mem_limit=None, expected_options=None):
    """ Submits a query via HS2 (optionally with a mem_limit in the confOverlay)
        into pool_name and checks that the expected_query_options are set in the
        profile."""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.confOverlay = {'request_pool': pool_name}
    if mem_limit is not None: execute_statement_req.confOverlay['mem_limit'] = mem_limit
    execute_statement_req.statement = "select 1";
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 1
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)

    close_operation_req = TCLIService.TCloseOperationReq()
    close_operation_req.operationHandle = execute_statement_resp.operationHandle
    HS2TestSuite.check_response(self.hs2_client.CloseOperation(close_operation_req))

    get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
    get_profile_req.operationHandle = execute_statement_resp.operationHandle
    get_profile_req.sessionHandle = self.session_handle
    get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
    HS2TestSuite.check_response(get_profile_resp)
    self.__check_query_options(get_profile_resp.profile, expected_options)
Esempio n. 11
0
    def __check_hs2_query_opts(self,
                               pool_name,
                               mem_limit=None,
                               expected_options=None):
        """ Submits a query via HS2 (optionally with a mem_limit in the confOverlay)
        into pool_name and checks that the expected_query_options are set in the
        profile."""
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = {'request_pool': pool_name}
        if mem_limit is not None:
            execute_statement_req.confOverlay['mem_limit'] = mem_limit
        execute_statement_req.statement = "select 1"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)

        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = execute_statement_resp.operationHandle
        fetch_results_req.maxRows = 1
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)

        close_operation_req = TCLIService.TCloseOperationReq()
        close_operation_req.operationHandle = execute_statement_resp.operationHandle
        HS2TestSuite.check_response(
            self.hs2_client.CloseOperation(close_operation_req))

        get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
        get_profile_req.operationHandle = execute_statement_resp.operationHandle
        get_profile_req.sessionHandle = self.session_handle
        get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
        HS2TestSuite.check_response(get_profile_resp)
        self.__check_query_options(get_profile_resp.profile, expected_options)
Esempio n. 12
0
  def __query_and_fetch(self, query):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = query
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 1024
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)

    return fetch_results_resp
Esempio n. 13
0
  def __query_and_fetch(self, query):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = query
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 1024
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)

    return fetch_results_resp
Esempio n. 14
0
  def __test_fetch_no_timeout(self):
    """Tests setting FETCH_ROWS_TIMEOUT_MS to 0, and validates that fetch requests wait
    indefinitely when the timeout is 0."""
    num_rows = 10
    statement = "select * from functional.alltypes limit {0}".format(num_rows)
    execute_statement_resp = self.execute_statement(statement,
        conf_overlay={'debug_action': 'CRS_BEFORE_COORD_STARTS:SLEEP@5000',
                      'fetch_rows_timeout_ms': '0'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Assert that the first fetch request returns 0 rows.
    fetch_results_resp = self.hs2_client.FetchResults(TCLIService.TFetchResultsReq(
        operationHandle=execute_statement_resp.operationHandle, maxRows=1024))
    HS2TestSuite.check_response(fetch_results_resp)
    assert self.get_num_rows(fetch_results_resp.results) == num_rows
Esempio n. 15
0
  def check_user_and_effective_user(self, proxy_user):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.confOverlay = dict()
    execute_statement_req.statement = "SELECT effective_user(), user()";
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 1
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    assert self.column_results_to_string(
      fetch_results_resp.results.columns) == (1, "%s, %s\n" % (proxy_user, USER_NAME))
Esempio n. 16
0
 def test_parallel_insert(self):
   """Tests parallel inserts with result set caching on.
   Parallel inserts have a coordinator instance but no coordinator
   fragment, so the query mem tracker is initialized differently.
   (IMPALA-963)
   """
   self.client.set_configuration({'sync_ddl': 1})
   self.client.execute("create table %s.orderclone like tpch.orders" % self.TEST_DB)
   execute_statement_req = TCLIService.TExecuteStatementReq()
   execute_statement_req.sessionHandle = self.session_handle
   execute_statement_req.confOverlay = dict()
   execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "10"
   execute_statement_req.statement = ("insert overwrite %s.orderclone "
                                     "select * from tpch.orders "
                                     "where o_orderkey < 0" % self.TEST_DB)
   execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
   HS2TestSuite.check_response(execute_statement_resp)
Esempio n. 17
0
    def check_user_and_effective_user(self, proxy_user):
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = dict()
        execute_statement_req.statement = "SELECT effective_user(), user()"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)

        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = execute_statement_resp.operationHandle
        fetch_results_req.maxRows = 1
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        assert self.column_results_to_string(
            fetch_results_resp.results.columns) == (1, "%s, %s\n" %
                                                    (proxy_user, USER_NAME))
Esempio n. 18
0
 def test_parallel_insert(self):
   """Tests parallel inserts with result set caching on.
   Parallel inserts have a coordinator instance but no coordinator
   fragment, so the query mem tracker is initialized differently.
   (IMPALA-963)
   """
   self.client.set_configuration({'sync_ddl': 1})
   self.client.execute("create table %s.orderclone like tpch.orders" % self.TEST_DB)
   execute_statement_req = TCLIService.TExecuteStatementReq()
   execute_statement_req.sessionHandle = self.session_handle
   execute_statement_req.confOverlay = dict()
   execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "10"
   execute_statement_req.statement = ("insert overwrite %s.orderclone "
                                     "select * from tpch.orders "
                                     "where o_orderkey < 0" % self.TEST_DB)
   execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
   HS2TestSuite.check_response(execute_statement_resp)
Esempio n. 19
0
  def __test_fetch_finished_timeout(self):
    """Tests the query option FETCH_ROWS_TIMEOUT_MS applies to both the time spent
    waiting for a query to finish and the time spent waiting for RowBatches to be sent,
    and that the timeout it not reset for in-progress fetch requests when queries
    transition to the 'finished' state."""
    num_rows = 20
    statement = "select sleep(500) from functional.alltypes limit {0}".format(num_rows)
    execute_statement_resp = self.execute_statement(statement,
            conf_overlay={'debug_action': 'CRS_BEFORE_COORD_STARTS:SLEEP@5000',
                'batch_size': '10', 'fetch_rows_timeout_ms': '7500'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Assert that the first fetch request returns 0 rows.
    fetch_results_resp = self.hs2_client.FetchResults(TCLIService.TFetchResultsReq(
        operationHandle=execute_statement_resp.operationHandle, maxRows=1024))
    HS2TestSuite.check_response(fetch_results_resp)
    assert fetch_results_resp.hasMoreRows
    assert HS2TestSuite.get_num_rows(fetch_results_resp.results) == 10

    # Wait for rows to be available for fetch.
    get_operation_status_resp = self.wait_for_operation_state(
        execute_statement_resp.operationHandle,
        TCLIService.TOperationState.FINISHED_STATE)
    HS2TestSuite.check_response(get_operation_status_resp)

    # Assert that all remaining rows can be fetched.
    FetchTimeoutUtils.fetch_num_rows(self.hs2_client,
        execute_statement_resp.operationHandle, num_rows - 10, statement)
Esempio n. 20
0
  def test_fetch_multiple_batches_timeout(self):
    """Validate that FETCH_ROWS_TIMEOUT_MS applies when reading multiple RowBatches.
    This test runs a query that produces multiple RowBatches with a fixed delay, and
    asserts that a fetch request to read all rows only reads a subset of the rows (since
    the timeout should ensure that a single fetch request cannot read all RowBatches)."""
    num_rows = 500
    statement = "select id from functional.alltypes limit {0}".format(num_rows)
    execute_statement_resp = self.execute_statement(statement,
        conf_overlay={'batch_size': '10',
                      'debug_action': '0:GETNEXT:DELAY',
                      'fetch_rows_timeout_ms': '500'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Wait for rows to be available for fetch.
    get_operation_status_resp = self.wait_for_operation_state(
        execute_statement_resp.operationHandle,
        TCLIService.TOperationState.FINISHED_STATE)
    HS2TestSuite.check_response(get_operation_status_resp)

    # Issue a fetch request to read all rows, and validate that only a subset of the rows
    # are returned.
    fetch_results_resp = self.hs2_client.FetchResults(TCLIService.TFetchResultsReq(
        operationHandle=execute_statement_resp.operationHandle, maxRows=num_rows))
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows_fetched = HS2TestSuite.get_num_rows(fetch_results_resp.results)
    assert num_rows_fetched > 0 and num_rows_fetched < num_rows
    assert fetch_results_resp.hasMoreRows

    # Assert that all remaining rows can be fetched.
    FetchTimeoutUtils.fetch_num_rows(self.hs2_client,
        execute_statement_resp.operationHandle, num_rows - num_rows_fetched, statement)
Esempio n. 21
0
  def __test_fetch_timeout(self):
    """Tests FETCH_ROWS_TIMEOUT_MS by running a query that produces RowBatches with a
    large delay. The test waits for the query to 'finish' and then fetches the first
    RowBatch, which should always be available since a query is only considered
    'finished' if rows are available. Subsequent fetches should time out because
    RowBatch production has been delayed."""
    # Construct a query where there is a large delay between RowBatch production.
    num_rows = 2
    statement = "select bool_col, avg(id) from functional.alltypes group by bool_col " \
                "having avg(id) != sleep(5000)"
    execute_statement_resp = self.execute_statement(statement,
        conf_overlay={'fetch_rows_timeout_ms': '1', 'batch_size': '1', 'num_nodes': '1'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Wait for rows to be available for fetch.
    get_operation_status_resp = self.wait_for_operation_state(
        execute_statement_resp.operationHandle,
        TCLIService.TOperationState.FINISHED_STATE, timeout=30)
    HS2TestSuite.check_response(get_operation_status_resp)

    # Assert that exactly 1 row can be fetched.
    FetchTimeoutUtils.fetch_num_rows(self.hs2_client,
        execute_statement_resp.operationHandle, 1, statement)

    # Assert that the next fetch request times out while waiting for a RowBatch to be
    # produced.
    fetch_results_resp = self.hs2_client.FetchResults(
        TCLIService.TFetchResultsReq(
            operationHandle=execute_statement_resp.operationHandle, maxRows=num_rows))
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows_fetched = HS2TestSuite.get_num_rows(fetch_results_resp.results)
    assert num_rows_fetched == 0
    assert fetch_results_resp.hasMoreRows
    FetchTimeoutUtils.fetch_num_rows(self.hs2_client,
        execute_statement_resp.operationHandle, 1, statement)
Esempio n. 22
0
  def __test_fetch_materialization_timeout(self):
    """Test the query option FETCH_ROWS_TIMEOUT_MS applies to the time taken to
    materialize rows. Runs a query with a sleep() which is evaluated during
    materialization and validates the timeout is applied appropriately."""
    num_rows = 2
    statement = "select sleep(5000) from functional.alltypes limit {0}".format(num_rows)
    execute_statement_resp = self.execute_statement(statement,
        conf_overlay={'batch_size': '1', 'fetch_rows_timeout_ms': '2500'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Wait for rows to be available for fetch.
    get_operation_status_resp = self.wait_for_operation_state(
        execute_statement_resp.operationHandle,
        TCLIService.TOperationState.FINISHED_STATE)
    HS2TestSuite.check_response(get_operation_status_resp)

    # Only one row should be returned because the timeout should be hit after
    # materializing the first row, but before materializing the second one.
    fetch_results_resp = self.hs2_client.FetchResults(
        TCLIService.TFetchResultsReq(
            operationHandle=execute_statement_resp.operationHandle, maxRows=2))
    HS2TestSuite.check_response(fetch_results_resp)
    assert HS2TestSuite.get_num_rows(fetch_results_resp.results) == 1

    # Assert that all remaining rows can be fetched.
    FetchTimeoutUtils.fetch_num_rows(self.hs2_client,
        execute_statement_resp.operationHandle, num_rows - 1, statement)
Esempio n. 23
0
  def test_execute_select_v1(self):
    """Test that a simple select statement works in the row-oriented protocol"""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.fetch(fetch_results_req)

    assert len(fetch_results_resp.results.rows) == 1
    assert fetch_results_resp.results.startRowOffset == 0

    try:
      assert not fetch_results_resp.hasMoreRows
    except AssertionError:
      pytest.xfail("IMPALA-558")
Esempio n. 24
0
    def test_disconnected_session_timeout(self):
        """Test that a session gets closed if it has no active connections for more than
    disconnected_session_timeout."""
        conn = HS2TestSuite()
        conn.setup()
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_resp = conn.hs2_client.OpenSession(open_session_req)
        HS2TestSuite.check_response(open_session_resp)
        conn.session_handle = open_session_resp.sessionHandle
        # Ren a query, which should succeed.
        conn.execute_statement("select 1")

        # Set up another connection and run a long-running query with the same session.
        conn2 = HS2TestSuite()
        conn2.setup()
        conn2.session_handle = open_session_resp.sessionHandle
        execute_resp = conn2.execute_statement("select sleep(10000)")

        # Close one connection and wait for longer than disconnected_session_timeout. The
        # session should still be available since there's still one active connection.
        conn2.teardown()
        sleep(5)
        conn.execute_statement("select 3")

        # Close the other connection and sleep again. THe session shuold now be closed.
        conn.teardown()
        sleep(5)
        conn.setup()

        # Run another query, which should fail since the session is closed.
        conn.execute_statement(
            "select 2",
            expected_error_prefix="Invalid session id",
            expected_status_code=TCLIService.TStatusCode.ERROR_STATUS)

        # Check that the query was cancelled correctly.
        query_id = operation_id_to_query_id(
            execute_resp.operationHandle.operationId)
        status = self.cluster.get_first_impalad().service.get_query_status(
            query_id)
        assert status == "Session closed because it has no active connections"
Esempio n. 25
0
  def test_alltypes_v1(self):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle

    execute_statement_req.statement =\
        "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    self.close(execute_statement_resp.operationHandle)

    execute_statement_req.statement =\
        "SELECT * FROM functional.decimal_tbl ORDER BY d1 LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    self.close(execute_statement_resp.operationHandle)
Esempio n. 26
0
  def test_query_stmts(self):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle

    execute_statement_req.statement =\
        "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    self.close(execute_statement_resp.operationHandle)

    execute_statement_req.statement =\
        "SELECT * FROM functional.decimal_tbl ORDER BY d1 LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch(execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    self.close(execute_statement_resp.operationHandle)
Esempio n. 27
0
  def test_execute_select_v1(self):
    """Test that a simple select statement works in the row-oriented protocol"""
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)

    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = execute_statement_resp.operationHandle
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)

    assert len(fetch_results_resp.results.rows) == 1
    assert fetch_results_resp.results.startRowOffset == 0

    try:
      assert not fetch_results_resp.hasMoreRows
    except AssertionError:
      pytest.xfail("IMPALA-558")
Esempio n. 28
0
  def __test_fetch_before_finished_timeout(self):
    """Tests the query option FETCH_ROWS_TIMEOUT_MS applies to fetch requests before the
    query has 'finished'. Fetch requests issued before the query has finished, should
    wait FETCH_ROWS_TIMEOUT_MS before returning. This test runs a query with a DELAY
    DEBUG_ACTION before Coordinator starts. Fetch requests during this delay should
    return with 0 rows."""
    num_rows = 10
    statement = "select * from functional.alltypes limit {0}".format(num_rows)
    execute_statement_resp = self.execute_statement(statement,
        conf_overlay={'debug_action': 'CRS_BEFORE_COORD_STARTS:SLEEP@5000',
                      'fetch_rows_timeout_ms': '1000'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Assert that the first fetch request returns 0 rows.
    fetch_results_resp = self.hs2_client.FetchResults(TCLIService.TFetchResultsReq(
        operationHandle=execute_statement_resp.operationHandle, maxRows=1024))
    HS2TestSuite.check_response(fetch_results_resp,
        expected_status_code=TCLIService.TStatusCode.STILL_EXECUTING_STATUS)
    assert fetch_results_resp.hasMoreRows
    assert not fetch_results_resp.results

    get_operation_status_resp = self.wait_for_operation_state(
        execute_statement_resp.operationHandle,
        TCLIService.TOperationState.FINISHED_STATE)
    HS2TestSuite.check_response(get_operation_status_resp)

    # Assert that all remaining rows can be fetched.
    FetchTimeoutUtils.fetch_num_rows(self.hs2_client,
        execute_statement_resp.operationHandle, num_rows, statement)
Esempio n. 29
0
    def test_expire_disconnected_session(self):
        """Test for the interaction between idle_session_timeout and
    disconnected_session_timeout"""
        # Close the default test clients so that they don't expire while the test is running
        # and affect the metric values.
        self.close_impala_clients()
        impalad = self.cluster.get_first_impalad()

        conn = HS2TestSuite()
        conn.setup()
        # Open a session and then close the connection.
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_resp = conn.hs2_client.OpenSession(open_session_req)
        HS2TestSuite.check_response(open_session_resp)
        conn.teardown()

        # The idle session timeout should be hit first, so the session will be expired.
        impalad.service.wait_for_metric_value(
            "impala-server.num-sessions-expired", 1)
        # The session should eventually be closed by the disconnected timeout.
        impalad.service.wait_for_metric_value(
            "impala-server.num-open-hiveserver2-sessions", 0)
Esempio n. 30
0
  def fetch_num_rows(hs2_client, op_handle, num_rows, statement):
    """Fetch the specified number of rows in the given op_handle and validate that the
    number of rows returned matches the expected number of rows. If the op_handle does
    not return the expected number of rows within a timeout, an error is thrown."""
    # The timeout to wait for fetch requests to fetch all rows.
    timeout = 30

    start_time = time()
    num_fetched = 0

    # Fetch results until either the timeout is hit or all rows have been fetched.
    while num_fetched != num_rows and time() - start_time < timeout:
      sleep(0.5)
      fetch_results_resp = hs2_client.FetchResults(
          TCLIService.TFetchResultsReq(operationHandle=op_handle,
              maxRows=num_rows - num_fetched))
      HS2TestSuite.check_response(fetch_results_resp)
      num_fetched += HS2TestSuite.get_num_rows(fetch_results_resp.results)
    if num_fetched != num_rows:
      raise Timeout("Query {0} did not fetch all results within the timeout {1}"
                    .format(statement, timeout))
    assert num_fetched == num_rows
Esempio n. 31
0
    def test_rows_materialized_counters(self):
        """Test that NumRowsFetched is updated even when a fetch request is served by the
    results cache, and that RowsMaterialized is only updated when rows are first created
    (e.g. not when they are served from the cache)."""
        num_rows = 10
        statement = "SELECT * FROM functional.alltypes LIMIT {0}".format(
            num_rows)
        num_rows_fetched = "NumRowsFetched: {0} ({0})"
        num_rows_fetched_from_cache = "NumRowsFetchedFromCache: {0} ({0})"

        # Execute the query with the results cache enabled.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = str(num_rows)
        execute_statement_req.statement = statement
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)

        # Fetch all rows from the query and verify they have been cached.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, num_rows)
        self.__verify_num_cached_rows(num_rows)

        # Get the runtime profile and validate that NumRowsFetched and RowsMaterialized both
        # equal the number of rows fetched by the query.
        profile = self.__get_runtime_profile(
            execute_statement_resp.operationHandle)
        assert num_rows_fetched.format(num_rows) in profile

        # Fetch all rows again and confirm that RowsMaterialized is unchanged, but
        # NumRowsFetched is double the number of rows returned by the query.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST, num_rows)
        profile = self.__get_runtime_profile(
            execute_statement_resp.operationHandle)
        assert num_rows_fetched.format(num_rows) in profile
        assert num_rows_fetched_from_cache.format(num_rows) in profile
Esempio n. 32
0
  def test_multiple_fetch_multiple_batches_timeout(self):
    """Test the query option FETCH_ROWS_TIMEOUT_MS by running a query with a DELAY
    DEBUG_ACTION and a low value for the fetch timeout. This test issues fetch requests
    in a loop until all results have been returned, and validates that some of the fetch
    requests timed out. It is similar to test_fetch_multiple_batches_timeout except it
    issues multiple fetch requests that are expected to timeout."""
    num_rows = 100
    statement = "select * from functional.alltypes limit {0}".format(num_rows)
    execute_statement_resp = self.execute_statement(statement,
        conf_overlay={'batch_size': '1', 'debug_action': '0:GETNEXT:DELAY',
                      'fetch_rows_timeout_ms': '1'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Wait for rows to be available for fetch.
    get_operation_status_resp = self.wait_for_operation_state(
        execute_statement_resp.operationHandle,
        TCLIService.TOperationState.FINISHED_STATE, timeout=30)
    HS2TestSuite.check_response(get_operation_status_resp)

    # The timeout to wait for fetch requests to fetch all rows.
    timeout = 30

    start_time = time()
    num_fetched = 0
    num_fetch_requests = 0

    # Fetch results until either the timeout is hit or all rows have been fetched.
    while num_fetched != num_rows and time() - start_time < timeout:
      sleep(0.5)
      fetch_results_resp = self.hs2_client.FetchResults(TCLIService.TFetchResultsReq(
          operationHandle=execute_statement_resp.operationHandle, maxRows=num_rows))
      HS2TestSuite.check_response(fetch_results_resp)
      num_fetched += HS2TestSuite.get_num_rows(fetch_results_resp.results)
      num_fetch_requests += 1
    if num_fetched != num_rows:
      raise Timeout("Query {0} did not fetch all results within the timeout {1}"
                    .format(statement, timeout))
    # The query produces 100 RowBatches, each batch was delayed 100ms before it was sent
    # to the PlanRootSink. Each fetch request requested all 100 rows, but since the
    # timeout is set to such a low value, multiple fetch requests should be necessary to
    # read all rows.
    assert num_fetch_requests >= 5
Esempio n. 33
0
  def test_result_metadata_v1(self):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle

    # Verify all primitive types in the alltypes table.
    execute_statement_req.statement =\
        "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch_at_most(execute_statement_resp.operationHandle,
                                 TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 13
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
    self.__verify_primitive_type(TTypeId.BOOLEAN_TYPE, column_types[1])
    self.__verify_primitive_type(TTypeId.TINYINT_TYPE, column_types[2])
    self.__verify_primitive_type(TTypeId.SMALLINT_TYPE, column_types[3])
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[4])
    self.__verify_primitive_type(TTypeId.BIGINT_TYPE, column_types[5])
    self.__verify_primitive_type(TTypeId.FLOAT_TYPE, column_types[6])
    self.__verify_primitive_type(TTypeId.DOUBLE_TYPE, column_types[7])
    self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[8])
    self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[9])
    self.__verify_primitive_type(TTypeId.TIMESTAMP_TYPE, column_types[10])
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[11])
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[12])
    self.close(execute_statement_resp.operationHandle)

    # Verify the result metadata for the DECIMAL type.
    execute_statement_req.statement =\
        "SELECT d1,d5 FROM functional.decimal_tbl ORDER BY d1 LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch_at_most(execute_statement_resp.operationHandle,
                                 TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    # Verify the result schema is what we expect. The result has 2 columns, the
    # first is decimal(9,0) and the second is decimal(10,5)
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 2
    self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[0])
    self.__verify_decimal_precision_scale(column_types[0], 9, 0)
    self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[1])
    self.__verify_decimal_precision_scale(column_types[1], 10, 5)
    self.close(execute_statement_resp.operationHandle)

    # Verify the result metadata for the CHAR/VARCHAR types.
    execute_statement_req.statement =\
        "SELECT * FROM functional.chars_tiny ORDER BY cs LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch_at_most(execute_statement_resp.operationHandle,
                                 TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 3
    self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[0])
    self.__verify_char_max_len(column_types[0], 5)
    self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[1])
    self.__verify_char_max_len(column_types[1], 140)
    self.__verify_primitive_type(TTypeId.VARCHAR_TYPE, column_types[2])
    self.__verify_char_max_len(column_types[2], 32)
    self.close(execute_statement_resp.operationHandle)
Esempio n. 34
0
  def test_invalid_secret(self):
    """Test that the FetchResults, GetResultSetMetadata and CloseOperation APIs validate
    the session secret."""
    execute_req = TCLIService.TExecuteStatementReq(
        self.session_handle, "select 'something something'")
    execute_resp = self.hs2_client.ExecuteStatement(execute_req)
    HS2TestSuite.check_response(execute_resp)

    good_handle = execute_resp.operationHandle
    bad_handle = create_op_handle_without_secret(good_handle)

    # Fetching and closing operations with an invalid handle should be a no-op, i.e.
    # the later operations with the good handle should succeed.
    HS2TestSuite.check_invalid_query(self.hs2_client.FetchResults(
        TCLIService.TFetchResultsReq(operationHandle=bad_handle, maxRows=1024)),
        expect_legacy_err=True)
    HS2TestSuite.check_invalid_query(self.hs2_client.GetResultSetMetadata(
        TCLIService.TGetResultSetMetadataReq(operationHandle=bad_handle)),
        expect_legacy_err=True)
    HS2TestSuite.check_invalid_query(self.hs2_client.CloseOperation(
        TCLIService.TCloseOperationReq(operationHandle=bad_handle)),
        expect_legacy_err=True)

    # Ensure that the good handle remained valid.
    HS2TestSuite.check_response(self.hs2_client.FetchResults(
        TCLIService.TFetchResultsReq(operationHandle=good_handle, maxRows=1024)))
    HS2TestSuite.check_response(self.hs2_client.GetResultSetMetadata(
        TCLIService.TGetResultSetMetadataReq(operationHandle=good_handle)))
    HS2TestSuite.check_response(self.hs2_client.CloseOperation(
        TCLIService.TCloseOperationReq(operationHandle=good_handle)))
Esempio n. 35
0
 def check_hs2_shutdown_error(hs2_response):
     HS2TestSuite.check_response(hs2_response,
                                 TCLIService.TStatusCode.ERROR_STATUS,
                                 SHUTDOWN_ERROR_PREFIX)
Esempio n. 36
0
class TestGracefulShutdown(CustomClusterTestSuite, HS2TestSuite):
    IDLE_SHUTDOWN_GRACE_PERIOD_S = 1
    IMPALA_SHUTDOWN_SIGNAL = signal.SIGRTMIN

    @classmethod
    def get_workload(cls):
        return 'functional-query'

    @SkipIfGCS.jira(reason="IMPALA-10562")
    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--shutdown_grace_period_s={grace_period} \
          --hostname={hostname}".format(
            grace_period=IDLE_SHUTDOWN_GRACE_PERIOD_S,
            hostname=socket.gethostname()))
    def test_shutdown_idle(self):
        """Test that idle impalads shut down in a timely manner after the shutdown grace
    period elapses."""
        impalad1 = psutil.Process(self.cluster.impalads[0].get_pid())
        impalad2 = psutil.Process(self.cluster.impalads[1].get_pid())
        impalad3 = psutil.Process(self.cluster.impalads[2].get_pid())

        # Test that a failed shut down from a bogus host or port fails gracefully.
        ex = self.execute_query_expect_failure(
            self.client, ":shutdown('e6c00ca5cd67b567eb96c6ecfb26f05')")
        assert "Could not find IPv4 address for:" in str(ex)
        ex = self.execute_query_expect_failure(
            self.client, ":shutdown('localhost:100000')")
        assert "invalid port:" in str(ex)
        assert (
            "This may be because the port specified is wrong.") not in str(ex)

        # Test that pointing to the wrong thrift service (the HS2 port) fails gracefully-ish.
        thrift_port = 21051  # HS2 port.
        ex = self.execute_query_expect_failure(
            self.client, ":shutdown('localhost:{0}')".format(thrift_port))
        assert ("failed with error 'RemoteShutdown() RPC failed") in str(ex)
        assert ("This may be because the port specified is wrong.") in str(ex)

        # Test RPC error handling with debug action.
        ex = self.execute_query_expect_failure(
            self.client,
            ":shutdown('localhost:27001')",
            query_options={'debug_action': 'CRS_SHUTDOWN_RPC:FAIL'})
        assert 'Rpc to 127.0.0.1:27001 failed with error \'Debug Action: ' \
            'CRS_SHUTDOWN_RPC:FAIL' in str(ex)

        # Test remote shutdown.
        LOG.info("Start remote shutdown {0}".format(time.time()))
        self.execute_query_expect_success(self.client,
                                          ":shutdown('localhost:27001')",
                                          query_options={})

        # Remote shutdown does not require statestore.
        self.cluster.statestored.kill()
        self.cluster.statestored.wait_for_exit()
        self.execute_query_expect_success(self.client,
                                          ":shutdown('localhost:27002')",
                                          query_options={})

        # Test local shutdown, which should succeed even with injected RPC error.
        LOG.info("Start local shutdown {0}".format(time.time()))
        self.execute_query_expect_success(
            self.client,
            ":shutdown('{0}:27000')".format(socket.gethostname()),
            query_options={'debug_action': 'CRS_SHUTDOWN_RPC:FAIL'})

        # Make sure that the impala daemons exit after the shutdown grace period plus a 10
        # second margin of error.
        start_time = time.time()
        LOG.info("Waiting for impalads to exit {0}".format(start_time))
        impalad1.wait()
        LOG.info("First impalad exited {0}".format(time.time()))
        impalad2.wait()
        LOG.info("Second impalad exited {0}".format(time.time()))
        impalad3.wait()
        LOG.info("Third impalad exited {0}".format(time.time()))
        shutdown_duration = time.time() - start_time
        assert shutdown_duration <= self.IDLE_SHUTDOWN_GRACE_PERIOD_S + 10

    EXEC_SHUTDOWN_GRACE_PERIOD_S = 5
    EXEC_SHUTDOWN_DEADLINE_S = 10

    @pytest.mark.execute_serially
    @SkipIfNotHdfsMinicluster.scheduling
    @CustomClusterTestSuite.with_args(
        impalad_args="--shutdown_grace_period_s={grace_period} \
          --shutdown_deadline_s={deadline} \
          --hostname={hostname}".format(
            grace_period=EXEC_SHUTDOWN_GRACE_PERIOD_S,
            deadline=EXEC_SHUTDOWN_DEADLINE_S,
            hostname=socket.gethostname()))
    def test_shutdown_executor(self):
        self.do_test_shutdown_executor(fetch_delay_s=0)

    @pytest.mark.execute_serially
    @SkipIfNotHdfsMinicluster.scheduling
    @CustomClusterTestSuite.with_args(
        impalad_args="--shutdown_grace_period_s={grace_period} \
          --shutdown_deadline_s={deadline} \
          --stress_status_report_delay_ms={status_report_delay_ms} \
          --hostname={hostname}".format(
            grace_period=EXEC_SHUTDOWN_GRACE_PERIOD_S,
            deadline=EXEC_SHUTDOWN_DEADLINE_S,
            status_report_delay_ms=5000,
            hostname=socket.gethostname()))
    def test_shutdown_executor_with_delay(self):
        """Regression test for IMPALA-7931 that adds delays to status reporting and
    to fetching of results to trigger races that previously resulted in query failures."""
        print self.exploration_strategy
        if self.exploration_strategy() != 'exhaustive':
            pytest.skip()
        self.do_test_shutdown_executor(fetch_delay_s=5)

    def do_test_shutdown_executor(self, fetch_delay_s):
        """Implementation of test that shuts down and then restarts an executor. This should
    not disrupt any queries that start after the shutdown or complete before the shutdown
    time limit. The test is parameterized by 'fetch_delay_s', the amount to delay before
    fetching from the query that must survive shutdown of an executor."""
        # Add sleeps to make sure that the query takes a couple of seconds to execute on the
        # executors.
        QUERY = "select count(*) from functional_parquet.alltypes where sleep(1) = bool_col"
        # Subtle: use a splittable file format like text for lineitem so that each backend
        # is guaranteed to get scan ranges that contain some actual rows. With Parquet on
        # S3, the files get broken into 32MB scan ranges and a backend might get unlucky
        # and only get scan ranges that don't contain the midpoint of any row group, and
        # therefore not actually produce any rows.
        SLOW_QUERY = "select count(*) from tpch.lineitem where sleep(1) = l_orderkey"
        SHUTDOWN_EXEC2 = ": shutdown('localhost:27001')"

        # Run this query before shutdown and make sure that it executes successfully on
        # all executors through the shutdown grace period without disruption.
        before_shutdown_handle = self.__exec_and_wait_until_running(QUERY)

        # Run this query which simulates getting stuck in admission control until after
        # the shutdown grace period expires. This demonstrates that queries don't get
        # cancelled if the cluster membership changes while they're waiting for admission.
        before_shutdown_admission_handle = self.execute_query_async(
            QUERY, {'debug_action': 'AC_BEFORE_ADMISSION:SLEEP@30000'})

        # Shut down and wait for the shutdown state to propagate through statestore.
        result = self.execute_query_expect_success(self.client, SHUTDOWN_EXEC2)
        assert parse_shutdown_result(result) == ("{0}s000ms".format(
            self.EXEC_SHUTDOWN_GRACE_PERIOD_S), "{0}s000ms".format(
                self.EXEC_SHUTDOWN_DEADLINE_S), "0", "1")

        # Check that the status is reflected on the debug page.
        web_json = self.cluster.impalads[1].service.get_debug_webpage_json("")
        assert web_json.get('is_quiescing', None) is True, web_json
        assert 'shutdown_status' in web_json, web_json

        self.impalad_test_service.wait_for_num_known_live_backends(
            2,
            timeout=self.EXEC_SHUTDOWN_GRACE_PERIOD_S + 5,
            interval=0.2,
            include_shutting_down=False)

        # Run another query, which shouldn't get scheduled on the new executor. We'll let
        # this query continue running through the full shutdown and restart cycle.
        after_shutdown_handle = self.__exec_and_wait_until_running(QUERY)

        # Wait for the impalad to exit, then start it back up and run another query, which
        # should be scheduled on it again.
        self.cluster.impalads[1].wait_for_exit()

        # Finish fetching results from the first query (which will be buffered on the
        # coordinator) after the backend exits. Add a delay before fetching to ensure
        # that the query is not torn down on the coordinator when the failure is
        # detected by the statestore (see IMPALA-7931).
        assert self.__fetch_and_get_num_backends(QUERY,
                                                 before_shutdown_handle,
                                                 delay_s=fetch_delay_s) == 3

        # Confirm that the query stuck in admission succeeded.
        assert self.__fetch_and_get_num_backends(
            QUERY, before_shutdown_admission_handle, timeout_s=30) == 2

        # Start the impalad back up and run another query, which should be scheduled on it
        # again.
        self.cluster.impalads[1].start()
        self.impalad_test_service.wait_for_num_known_live_backends(
            3, timeout=30, interval=0.2, include_shutting_down=False)
        after_restart_handle = self.__exec_and_wait_until_running(QUERY)

        # The query started while the backend was shut down should not run on that backend.
        assert self.__fetch_and_get_num_backends(QUERY,
                                                 after_shutdown_handle) == 2
        assert self.__fetch_and_get_num_backends(QUERY,
                                                 after_restart_handle) == 3

        # Test that a query will fail when the executor shuts down after the limit.
        deadline_expiry_handle = self.__exec_and_wait_until_running(SLOW_QUERY)
        result = self.execute_query_expect_success(self.client, SHUTDOWN_EXEC2)
        assert parse_shutdown_result(result) == ("{0}s000ms".format(
            self.EXEC_SHUTDOWN_GRACE_PERIOD_S), "{0}s000ms".format(
                self.EXEC_SHUTDOWN_DEADLINE_S), "0", "1")
        self.cluster.impalads[1].wait_for_exit()
        self.__check_deadline_expired(SLOW_QUERY, deadline_expiry_handle)

        # Test that we can reduce the deadline after setting it to a high value.
        # Run a query that will fail as a result of the reduced deadline.
        deadline_expiry_handle = self.__exec_and_wait_until_running(SLOW_QUERY)
        SHUTDOWN_EXEC3 = ": shutdown('localhost:27002', {0})"
        VERY_HIGH_DEADLINE = 5000
        HIGH_DEADLINE = 1000
        LOW_DEADLINE = 5
        result = self.execute_query_expect_success(
            self.client, SHUTDOWN_EXEC3.format(HIGH_DEADLINE))
        grace, deadline, _, _ = parse_shutdown_result(result)
        assert grace == "{0}s000ms".format(self.EXEC_SHUTDOWN_GRACE_PERIOD_S)
        assert deadline == "{0}m{1}s".format(HIGH_DEADLINE / 60,
                                             HIGH_DEADLINE % 60)

        result = self.execute_query_expect_success(
            self.client, SHUTDOWN_EXEC3.format(VERY_HIGH_DEADLINE))
        _, deadline, _, _ = parse_shutdown_result(result)
        LOG.info("Deadline is {0}".format(deadline))
        min_string, sec_string = re.match("([0-9]*)m([0-9]*)s",
                                          deadline).groups()
        assert int(min_string) * 60 + int(sec_string) <= HIGH_DEADLINE, \
            "Cannot increase deadline " + deadline

        result = self.execute_query_expect_success(
            self.client, SHUTDOWN_EXEC3.format(LOW_DEADLINE))
        _, deadline, _, queries_executing = parse_shutdown_result(result)
        assert deadline == "{0}s000ms".format(LOW_DEADLINE)
        assert int(
            queries_executing) > 0, "Slow query should still be running."
        self.cluster.impalads[2].wait_for_exit()
        self.__check_deadline_expired(SLOW_QUERY, deadline_expiry_handle)

    COORD_SHUTDOWN_GRACE_PERIOD_S = 5
    COORD_SHUTDOWN_DEADLINE_S = 120

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--shutdown_grace_period_s={grace_period} \
          --shutdown_deadline_s={deadline} \
          --hostname={hostname}".format(
            grace_period=COORD_SHUTDOWN_GRACE_PERIOD_S,
            deadline=COORD_SHUTDOWN_DEADLINE_S,
            hostname=socket.gethostname()),
        default_query_options=[("num_scanner_threads", "1")])
    @needs_session(TCLIService.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6,
                   close_session=False)
    def test_shutdown_coordinator(self):
        """Test that shuts down the coordinator. Running queries should finish but new
    requests should be rejected."""
        # Start a query running. This should complete successfully and keep the coordinator
        # up until it finishes. We set NUM_SCANNER_THREADS=1 above to make the runtime more
        # predictable.
        SLOW_QUERY = """select * from tpch_parquet.lineitem where sleep(1) < l_orderkey"""
        SHUTDOWN = ": shutdown()"
        SHUTDOWN_ERROR_PREFIX = 'Server is being shut down:'

        before_shutdown_handle = self.__exec_and_wait_until_running(SLOW_QUERY)
        before_shutdown_hs2_handle = self.execute_statement(
            SLOW_QUERY).operationHandle

        # Shut down the coordinator. Operations that start after this point should fail.
        result = self.execute_query_expect_success(self.client, SHUTDOWN)
        grace, deadline, registered, _ = parse_shutdown_result(result)
        assert grace == "{0}s000ms".format(self.COORD_SHUTDOWN_GRACE_PERIOD_S)
        assert deadline == "{0}m".format(self.COORD_SHUTDOWN_DEADLINE_S /
                                         60), "4"
        assert registered == "3"

        # Expect that the beeswax shutdown error occurs when calling fn()
        def expect_beeswax_shutdown_error(fn):
            try:
                fn()
            except ImpalaBeeswaxException, e:
                assert SHUTDOWN_ERROR_PREFIX in str(e)

        expect_beeswax_shutdown_error(lambda: self.client.execute("select 1"))
        expect_beeswax_shutdown_error(
            lambda: self.client.execute_async("select 1"))

        # Test that the HS2 shutdown error occurs for various HS2 operations.
        self.execute_statement("select 1", None,
                               TCLIService.TStatusCode.ERROR_STATUS,
                               SHUTDOWN_ERROR_PREFIX)

        def check_hs2_shutdown_error(hs2_response):
            HS2TestSuite.check_response(hs2_response,
                                        TCLIService.TStatusCode.ERROR_STATUS,
                                        SHUTDOWN_ERROR_PREFIX)

        check_hs2_shutdown_error(
            self.hs2_client.OpenSession(TCLIService.TOpenSessionReq()))
        check_hs2_shutdown_error(
            self.hs2_client.GetInfo(
                TCLIService.TGetInfoReq(
                    self.session_handle,
                    TCLIService.TGetInfoType.CLI_MAX_DRIVER_CONNECTIONS)))
        check_hs2_shutdown_error(
            self.hs2_client.GetTypeInfo(
                TCLIService.TGetTypeInfoReq(self.session_handle)))
        check_hs2_shutdown_error(
            self.hs2_client.GetCatalogs(
                TCLIService.TGetCatalogsReq(self.session_handle)))
        check_hs2_shutdown_error(
            self.hs2_client.GetSchemas(
                TCLIService.TGetSchemasReq(self.session_handle)))
        check_hs2_shutdown_error(
            self.hs2_client.GetTables(
                TCLIService.TGetTablesReq(self.session_handle)))
        check_hs2_shutdown_error(
            self.hs2_client.GetTableTypes(
                TCLIService.TGetTableTypesReq(self.session_handle)))
        check_hs2_shutdown_error(
            self.hs2_client.GetColumns(
                TCLIService.TGetColumnsReq(self.session_handle)))
        check_hs2_shutdown_error(
            self.hs2_client.GetFunctions(
                TCLIService.TGetFunctionsReq(self.session_handle,
                                             functionName="")))

        # Operations on running HS2 query still work.
        self.fetch_until(before_shutdown_hs2_handle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        HS2TestSuite.check_response(
            self.hs2_client.CancelOperation(
                TCLIService.TCancelOperationReq(before_shutdown_hs2_handle)))
        HS2TestSuite.check_response(
            self.hs2_client.CloseOperation(
                TCLIService.TCloseOperationReq(before_shutdown_hs2_handle)))

        # Make sure that the beeswax query is still executing, then close it to allow the
        # coordinator to shut down.
        self.impalad_test_service.wait_for_query_state(
            self.client,
            before_shutdown_handle,
            self.client.QUERY_STATES['FINISHED'],
            timeout=20)
        self.client.close_query(before_shutdown_handle)
        self.cluster.impalads[0].wait_for_exit()
 def check_hs2_shutdown_error(hs2_response):
   HS2TestSuite.check_response(hs2_response, TCLIService.TStatusCode.ERROR_STATUS,
     SHUTDOWN_ERROR_PREFIX)
Esempio n. 38
0
  def test_query_stmts(self):
    """Tests Impala's limited support for the FETCH_FIRST fetch orientation for queries.
    Impala permits FETCH_FIRST for a particular query iff result caching is enabled
    via the 'impala.resultset.cache.size' confOverlay option. FETCH_FIRST will succeed as
    long as all previously fetched rows fit into the bounded result cache.
    Regardless of whether a FETCH_FIRST succeeds or not, clients may always resume
    fetching with FETCH_NEXT.
    """
    # Negative tests for the result caching option.
    self.__test_invalid_result_caching("SELECT COUNT(*) FROM functional.alltypes")

    # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails.
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.confOverlay = dict()
    execute_statement_req.statement =\
      "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    for i in xrange(1, 5):
      # Fetch 10 rows with the FETCH_NEXT orientation.
      expected_num_rows = 10
      if i == 4:
        expected_num_rows = 0;
      self.fetch(execute_statement_resp.operationHandle,
                 TCLIService.TFetchOrientation.FETCH_NEXT, 10, expected_num_rows)
      # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error.
      # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT.
      self.fetch_fail(execute_statement_resp.operationHandle,
                      TCLIService.TFetchOrientation.FETCH_FIRST,
                      "Restarting of fetch requires enabling of query result caching")
    self.__verify_num_cached_rows(0)
    self.close(execute_statement_resp.operationHandle)

    # Basic test of FETCH_FIRST where the entire result set is cached, and we repeatedly
    # fetch all results.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "30"
    execute_statement_req.statement =\
      "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    for _ in xrange(1, 5):
      self.fetch(execute_statement_resp.operationHandle,
                 TCLIService.TFetchOrientation.FETCH_FIRST, 30)
      self.__verify_num_cached_rows(30)
    self.close(execute_statement_resp.operationHandle)

    # Test FETCH_NEXT and FETCH_FIRST where the entire result set does not fit into
    # the cache. FETCH_FIRST will succeed as long as the fetched results
    # fit into the cache.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "29"
    execute_statement_req.statement =\
      "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    # Fetch 10 rows. They fit in the result cache.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 10)
    self.__verify_num_cached_rows(10)
    # Restart the fetch and expect success.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_FIRST, 10)
    # Fetch 10 more rows. The result cache has 20 rows total now.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 10)
    self.__verify_num_cached_rows(20)
    # Restart the fetch and expect success.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_FIRST, 10)
    self.__verify_num_cached_rows(20)
    # Fetch 10 more rows from the cache.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 10)
    self.__verify_num_cached_rows(20)
    # This fetch exhausts the result cache.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 10)
    self.__verify_num_cached_rows(0)
    # Since the cache is exhausted, FETCH_FIRST will fail.
    self.fetch_fail(execute_statement_resp.operationHandle,
                    TCLIService.TFetchOrientation.FETCH_FIRST,
                    "The query result cache exceeded its limit of 29 rows. "
                    "Restarting the fetch is not possible")
    self.__verify_num_cached_rows(0)
    # This fetch should succeed but return 0 rows because the stream is eos.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 10, 0)
    self.__verify_num_cached_rows(0)
    self.close(execute_statement_resp.operationHandle)

    # Test that FETCH_FIRST serves results from the cache as well as the query
    # coordinator in a single fetch request.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "29"
    execute_statement_req.statement =\
      "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    # Fetch 7 rows. They fit in the result cache.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 7)
    self.__verify_num_cached_rows(7)
    # Restart the fetch asking for 12 rows, 7 of which are served from the cache and 5
    # from the coordinator. The result cache should have 12 rows total now.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_FIRST, 12)
    self.__verify_num_cached_rows(12)
    # Restart the fetch asking for 40 rows. We expect 30 results returned and that the
    # cache is exhausted.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_FIRST, 40, 30)
    self.__verify_num_cached_rows(0)
    # Fetch next should succeed and return 0 rows (eos).
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 7, 0)
    self.__verify_num_cached_rows(0)
    # Since the cache is exhausted, FETCH_FIRST will fail.
    self.fetch_fail(execute_statement_resp.operationHandle,
                    TCLIService.TFetchOrientation.FETCH_FIRST,
                    "The query result cache exceeded its limit of 29 rows. "
                    "Restarting the fetch is not possible")
    self.__verify_num_cached_rows(0)
    self.close(execute_statement_resp.operationHandle)

    # Test that resuming FETCH_NEXT after a failed FETCH_FIRST works.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "10"
    execute_statement_req.statement =\
      "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    # Fetch 9 rows. They fit in the result cache.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 9)
    self.__verify_num_cached_rows(9)
    # Fetch 9 rows. Cache is exhausted now.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 9)
    self.__verify_num_cached_rows(0)
    # Restarting the fetch should fail.
    self.fetch_fail(execute_statement_resp.operationHandle,
                    TCLIService.TFetchOrientation.FETCH_FIRST,
                    "The query result cache exceeded its limit of 10 rows. "
                    "Restarting the fetch is not possible")
    self.__verify_num_cached_rows(0)
    # Resuming FETCH_NEXT should succeed. There are 12 remaining rows to fetch.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 100, 12)
    self.__verify_num_cached_rows(0)
    self.close(execute_statement_resp.operationHandle)
Esempio n. 39
0
  def test_result_metadata_v1(self):
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle

    # Verify all primitive types in the alltypes table.
    execute_statement_req.statement =\
        "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch_at_most(execute_statement_resp.operationHandle,
                                 TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 13
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
    self.__verify_primitive_type(TTypeId.BOOLEAN_TYPE, column_types[1])
    self.__verify_primitive_type(TTypeId.TINYINT_TYPE, column_types[2])
    self.__verify_primitive_type(TTypeId.SMALLINT_TYPE, column_types[3])
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[4])
    self.__verify_primitive_type(TTypeId.BIGINT_TYPE, column_types[5])
    self.__verify_primitive_type(TTypeId.FLOAT_TYPE, column_types[6])
    self.__verify_primitive_type(TTypeId.DOUBLE_TYPE, column_types[7])
    self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[8])
    self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[9])
    self.__verify_primitive_type(TTypeId.TIMESTAMP_TYPE, column_types[10])
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[11])
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[12])
    self.close(execute_statement_resp.operationHandle)

    # Verify the result metadata for the DECIMAL type.
    execute_statement_req.statement =\
        "SELECT d1,d5 FROM functional.decimal_tbl ORDER BY d1 LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch_at_most(execute_statement_resp.operationHandle,
                                 TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    # Verify the result schema is what we expect. The result has 2 columns, the
    # first is decimal(9,0) and the second is decimal(10,5)
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 2
    self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[0])
    self.__verify_decimal_precision_scale(column_types[0], 9, 0)
    self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[1])
    self.__verify_decimal_precision_scale(column_types[1], 10, 5)
    self.close(execute_statement_resp.operationHandle)

    # Verify the result metadata for the CHAR/VARCHAR types.
    execute_statement_req.statement =\
        "SELECT * FROM functional.chars_tiny ORDER BY cs LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch_at_most(execute_statement_resp.operationHandle,
                                 TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 3
    self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[0])
    self.__verify_char_max_len(column_types[0], 5)
    self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[1])
    self.__verify_char_max_len(column_types[1], 140)
    self.__verify_primitive_type(TTypeId.VARCHAR_TYPE, column_types[2])
    self.__verify_char_max_len(column_types[2], 32)
    self.close(execute_statement_resp.operationHandle)

    # Verify the result metadata for the DATE type.
    execute_statement_req.statement =\
        "SELECT * FROM functional.date_tbl ORDER BY date_col LIMIT 1"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    results = self.fetch_at_most(execute_statement_resp.operationHandle,
                                 TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
    assert len(results.results.rows) == 1
    metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
    column_types = metadata_resp.schema.columns
    assert len(column_types) == 3
    self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
    self.__verify_primitive_type(TTypeId.DATE_TYPE, column_types[1])
    self.__verify_primitive_type(TTypeId.DATE_TYPE, column_types[2])
    self.close(execute_statement_resp.operationHandle)
Esempio n. 40
0
  def test_non_query_stmts(self):
    """Tests Impala's limited support for the FETCH_FIRST fetch orientation for
    non-query stmts that return a result set, such as SHOW, COMPUTE STATS, etc.
    The results of non-query statements are always cached entirely, and therefore,
    the cache can never be exhausted, i.e., FETCH_FIRST should always succeed.
    However, we only allow FETCH_FIRST on non-query stmts if query caching was enabled
    by the client for consistency. We use a 'show stats' stmt as a representative
    of these types of non-query stmts.
    """
    # Negative tests for the result caching option.
    self.__test_invalid_result_caching("show table stats functional.alltypes")

    # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails.
    # The show stmt returns exactly 25 results.
    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.confOverlay = dict()
    execute_statement_req.statement = "show table stats functional.alltypes"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    for i in xrange(1, 5):
      # Fetch 10 rows with the FETCH_NEXT orientation.
      expected_num_rows = 10
      if i == 3:
        expected_num_rows = 5
      if i == 4:
        expected_num_rows = 0
      self.fetch(execute_statement_resp.operationHandle,
                 TCLIService.TFetchOrientation.FETCH_NEXT, 10, expected_num_rows)
      # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error.
      # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT.
      self.fetch_fail(execute_statement_resp.operationHandle,
                      TCLIService.TFetchOrientation.FETCH_FIRST,
                      "Restarting of fetch requires enabling of query result caching")
      # The results of non-query stmts are not counted as 'cached'.
      self.__verify_num_cached_rows(0)

    # Tests that FETCH_FIRST always succeeds as long as result caching is enabled.
    # The show stmt returns exactly 25 results. The cache cannot be exhausted.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1"
    execute_statement_req.statement = "show table stats functional.alltypes"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    for _ in xrange(1, 5):
      self.fetch(execute_statement_resp.operationHandle,
                 TCLIService.TFetchOrientation.FETCH_FIRST, 30, 25)
    # The results of non-query stmts are not counted as 'cached'.
    self.__verify_num_cached_rows(0)

    # Test combinations of FETCH_FIRST and FETCH_NEXT.
    # The show stmt returns exactly 25 results.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1"
    execute_statement_req.statement = "show table stats functional.alltypes"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp)
    # Fetch 10 rows.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 10)
    # Restart the fetch asking for 20 rows.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_FIRST, 20)
    # FETCH_NEXT asking for 100 rows. There are only 5 remaining rows.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 100, 5)
    # Restart the fetch asking for 10 rows.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_FIRST, 5)
    # FETCH_NEXT asking for 100 rows. There are only 20 remaining rows.
    self.fetch(execute_statement_resp.operationHandle,
               TCLIService.TFetchOrientation.FETCH_NEXT, 100, 20)
Esempio n. 41
0
    def test_non_query_stmts(self):
        """Tests Impala's limited support for the FETCH_FIRST fetch orientation for
    non-query stmts that return a result set, such as SHOW, COMPUTE STATS, etc.
    The results of non-query statements are always cached entirely, and therefore,
    the cache can never be exhausted, i.e., FETCH_FIRST should always succeed.
    However, we only allow FETCH_FIRST on non-query stmts if query caching was enabled
    by the client for consistency. We use a 'show stats' stmt as a representative
    of these types of non-query stmts.
    """
        # Negative tests for the result caching option.
        self.__test_invalid_result_caching(
            "show table stats functional.alltypes")

        # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails.
        # The show stmt returns exactly 25 results.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = dict()
        execute_statement_req.statement = "show table stats functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        for i in xrange(1, 5):
            # Fetch 10 rows with the FETCH_NEXT orientation.
            expected_num_rows = 10
            if i == 3:
                expected_num_rows = 5
            if i == 4:
                expected_num_rows = 0
            self.fetch_until(execute_statement_resp.operationHandle,
                             TCLIService.TFetchOrientation.FETCH_NEXT, 10,
                             expected_num_rows)
            # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error.
            # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT.
            self.fetch_fail(
                execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_FIRST,
                "Restarting of fetch requires enabling of query result caching"
            )
            # The results of non-query stmts are not counted as 'cached'.
            self.__verify_num_cached_rows(0)

        # Tests that FETCH_FIRST always succeeds as long as result caching is enabled.
        # The show stmt returns exactly 25 results. The cache cannot be exhausted.
        execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1"
        execute_statement_req.statement = "show table stats functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        for _ in xrange(1, 5):
            self.fetch_until(execute_statement_resp.operationHandle,
                             TCLIService.TFetchOrientation.FETCH_FIRST, 30, 25)
        # The results of non-query stmts are not counted as 'cached'.
        self.__verify_num_cached_rows(0)

        # Test combinations of FETCH_FIRST and FETCH_NEXT.
        # The show stmt returns exactly 25 results.
        execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "1"
        execute_statement_req.statement = "show table stats functional.alltypes"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        # Fetch 10 rows.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        # Restart the fetch asking for 20 rows.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST, 20)
        # FETCH_NEXT asking for 100 rows. There are only 5 remaining rows.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 100, 5)
        # Restart the fetch asking for 10 rows.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST, 5)
        # FETCH_NEXT asking for 100 rows. There are only 20 remaining rows.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 100, 20)
Esempio n. 42
0
    def run_query_stmts_test(self):
        """Tests Impala's limited support for the FETCH_FIRST fetch orientation for queries.
    Impala permits FETCH_FIRST for a particular query iff result caching is enabled
    via the 'impala.resultset.cache.size' confOverlay option. FETCH_FIRST will succeed as
    long as all previously fetched rows fit into the bounded result cache.
    Regardless of whether a FETCH_FIRST succeeds or not, clients may always resume
    fetching with FETCH_NEXT.
    """
        # Negative tests for the result caching option.
        self.__test_invalid_result_caching(
            "SELECT COUNT(*) FROM functional.alltypes")
        # Test that FETCH_NEXT without result caching succeeds and FETCH_FIRST fails.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.confOverlay = dict()
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(execute_statement_resp)
        for i in xrange(1, 5):
            # Fetch 10 rows with the FETCH_NEXT orientation.
            expected_num_rows = 10
            if i == 4:
                expected_num_rows = 0
            self.fetch_until(execute_statement_resp.operationHandle,
                             TCLIService.TFetchOrientation.FETCH_NEXT, 10,
                             expected_num_rows)
            # Fetch 10 rows with the FETCH_FIRST orientation, expecting an error.
            # After a failed FETCH_FIRST, the client can still resume FETCH_NEXT.
            self.fetch_fail(
                execute_statement_resp.operationHandle,
                TCLIService.TFetchOrientation.FETCH_FIRST,
                "Restarting of fetch requires enabling of query result caching"
            )
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)

        # Basic test of FETCH_FIRST where the entire result set is cached, and we repeatedly
        # fetch all results.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "30"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        for _ in xrange(1, 5):
            self.fetch_until(execute_statement_resp.operationHandle,
                             TCLIService.TFetchOrientation.FETCH_FIRST, 30)
            self.__verify_num_cached_rows(30)
        self.close(execute_statement_resp.operationHandle)

        # Test FETCH_NEXT and FETCH_FIRST where the entire result set does not fit into
        # the cache. FETCH_FIRST will succeed as long as the fetched results
        # fit into the cache.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "29"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        # Fetch 10 rows. They fit in the result cache.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(10)
        # Restart the fetch and expect success.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST, 10)
        # Fetch 10 more rows. The result cache has 20 rows total now.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(20)
        # Restart the fetch and expect success.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST, 10)
        self.__verify_num_cached_rows(20)
        # Fetch 10 more rows from the cache.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(20)
        # This fetch exhausts the result cache.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 10)
        self.__verify_num_cached_rows(0)
        # Since the cache is exhausted, FETCH_FIRST will fail.
        self.fetch_fail(
            execute_statement_resp.operationHandle,
            TCLIService.TFetchOrientation.FETCH_FIRST,
            "The query result cache exceeded its limit of 29 rows. "
            "Restarting the fetch is not possible")
        self.__verify_num_cached_rows(0)
        # This fetch should succeed but return 0 rows because the stream is eos.
        self.fetch_at_most(execute_statement_resp.operationHandle,
                           TCLIService.TFetchOrientation.FETCH_NEXT, 10, 0)
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)

        # Test that FETCH_FIRST serves results from the cache as well as the query
        # coordinator in a single fetch request.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "29"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        # Fetch 7 rows. They fit in the result cache.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 7)
        self.__verify_num_cached_rows(7)
        # Restart the fetch asking for 12 rows, 7 of which are served from the cache and 5
        # from the coordinator. The result cache should have 12 rows total now.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST, 12)
        self.__verify_num_cached_rows(12)
        # Restart the fetch asking for 40 rows. We expect 30 results returned and that the
        # cache is exhausted.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_FIRST, 40, 30)
        self.__verify_num_cached_rows(0)
        # Fetch next should succeed and return 0 rows (eos).
        self.fetch_at_most(execute_statement_resp.operationHandle,
                           TCLIService.TFetchOrientation.FETCH_NEXT, 7, 0)
        self.__verify_num_cached_rows(0)
        # Since the cache is exhausted, FETCH_FIRST will fail.
        self.fetch_fail(
            execute_statement_resp.operationHandle,
            TCLIService.TFetchOrientation.FETCH_FIRST,
            "The query result cache exceeded its limit of 29 rows. "
            "Restarting the fetch is not possible")
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)

        # Test that resuming FETCH_NEXT after a failed FETCH_FIRST works.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "10"
        execute_statement_req.statement =\
          "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        # Fetch 9 rows. They fit in the result cache.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 9)
        self.__verify_num_cached_rows(9)
        # Fetch 9 rows. Cache is exhausted now.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 9)
        self.__verify_num_cached_rows(0)
        # Restarting the fetch should fail.
        self.fetch_fail(
            execute_statement_resp.operationHandle,
            TCLIService.TFetchOrientation.FETCH_FIRST,
            "The query result cache exceeded its limit of 10 rows. "
            "Restarting the fetch is not possible")
        self.__verify_num_cached_rows(0)
        # Resuming FETCH_NEXT should succeed. There are 12 remaining rows to fetch.
        self.fetch_until(execute_statement_resp.operationHandle,
                         TCLIService.TFetchOrientation.FETCH_NEXT, 100, 12)
        self.__verify_num_cached_rows(0)
        self.close(execute_statement_resp.operationHandle)