Ejemplo n.º 1
0
def get_num_cache_requests():
    """Returns the number of outstanding cache requests. Due to race conditions in the
    way cache requests are added/dropped/reported (see IMPALA-3040), this function tries
    to return a stable result by making several attempts to stabilize it within a
    reasonable timeout."""
    def get_num_cache_requests_util():
        rc, stdout, stderr = exec_process(
            "hdfs cacheadmin -listDirectives -stats")
        assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout,
                                                                    stderr)
        return len(stdout.split('\n'))

    # IMPALA-3040: This can take time, especially under slow builds like ASAN.
    wait_time_in_sec = specific_build_type_timeout(5, slow_build_timeout=20)
    num_stabilization_attempts = 0
    max_num_stabilization_attempts = 10
    new_requests = None
    num_requests = None
    LOG.info("{0} Entered get_num_cache_requests()".format(time.time()))
    while num_stabilization_attempts < max_num_stabilization_attempts:
        new_requests = get_num_cache_requests_util()
        if new_requests == num_requests: break
        LOG.info("{0} Waiting to stabilise: num_requests={1} new_requests={2}".
                 format(time.time(), num_requests, new_requests))
        num_requests = new_requests
        num_stabilization_attempts = num_stabilization_attempts + 1
        time.sleep(wait_time_in_sec)
    LOG.info("{0} Final num requests: {1}".format(time.time(), num_requests))
    return num_requests
Ejemplo n.º 2
0
    def test_restart_statestore(self, cursor):
        """ Regression test of IMPALA-6973. After the statestore restarts, the metadata should
        eventually recover after being cleared by the new statestore.
    """

        self.cluster.statestored.restart()
        # We need to wait for the impalad to register to the new statestored and for a
        # non-empty catalog update from the new statestored. It cannot be expressed with the
        # existing metrics yet so we wait for some time here.
        wait_time_s = specific_build_type_timeout(60, slow_build_timeout=100)
        sleep(wait_time_s)
        for retry in xrange(wait_time_s):
            try:
                cursor.execute("describe database functional")
                return
            except HiveServer2Error, e:
                assert "AnalysisException: Database does not exist: functional" in e.message,\
                       "Unexpected exception: " + e.message
                sleep(1)
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#

import pytest
import re
import time

from tests.common.environ import specific_build_type_timeout
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfLocal

WAIT_TIME_MS = specific_build_type_timeout(60000, slow_build_timeout=100000)


@SkipIfLocal.multiple_impalad
class TestRuntimeFilters(ImpalaTestSuite):
    @classmethod
    def get_workload(cls):
        return 'functional-query'

    @classmethod
    def add_test_dimensions(cls):
        super(TestRuntimeFilters, cls).add_test_dimensions()
        # Runtime filters are disabled on HBase
        cls.ImpalaTestMatrix.add_constraint(
            lambda v: v.get_value('table_format').file_format not in ['hbase'])
Ejemplo n.º 4
0
# The number of queries to submit. The test does not support fewer queries than
# MAX_NUM_CONCURRENT_QUERIES + MAX_NUM_QUEUED_QUERIES to keep some validation logic
# simple.
NUM_QUERIES = [15, 30, 50]

# Whether we will submit queries to all available impalads (in a round-robin fashion)
ROUND_ROBIN_SUBMISSION = [True, False]

# The query pool to use. The impalads should be configured to recognize this
# pool with the parameters below.
POOL_NAME = "default-pool"

# Stress test timeout (seconds). The timeout needs to be significantly higher for
# slow builds like code coverage and ASAN (IMPALA-3790, IMPALA-6241).
STRESS_TIMEOUT = specific_build_type_timeout(60, slow_build_timeout=600)

# The number of queries that can execute concurrently in the pool POOL_NAME.
MAX_NUM_CONCURRENT_QUERIES = 5

# The number of queries that can be queued in the pool POOL_NAME
MAX_NUM_QUEUED_QUERIES = 10

# Mem limit (bytes) used in the mem limit test
MEM_TEST_LIMIT = 12 * 1024 * 1024 * 1024

_STATESTORED_ARGS = "-statestore_heartbeat_frequency_ms=%s "\
                    "-statestore_update_frequency_ms=%s" %\
                    (STATESTORE_RPC_FREQUENCY_MS, STATESTORE_RPC_FREQUENCY_MS)

# Key in the query profile for the query options.
# simple.
NUM_QUERIES = [15, 30, 50]

# Whether we will submit queries to all available impalads (in a round-robin fashion)
ROUND_ROBIN_SUBMISSION = [True, False]

# The query pool to use. The impalads should be configured to recognize this
# pool with the parameters below.
POOL_NAME = "default-pool"

# The statestore heartbeat and topic update frequency (ms). Set low for testing.
STATESTORE_RPC_FREQUENCY_MS = 500

# Stress test timeout (seconds). The timeout needs to be significantly higher in code
# coverage builds (IMPALA-3790).
STRESS_TIMEOUT = specific_build_type_timeout(30, code_coverage_build_timeout=600)

# The number of queries that can execute concurrently in the pool POOL_NAME.
MAX_NUM_CONCURRENT_QUERIES = 5

# The number of queries that can be queued in the pool POOL_NAME
MAX_NUM_QUEUED_QUERIES = 10

# Mem limit (bytes) used in the mem limit test
MEM_TEST_LIMIT = 12 * 1024 * 1024 * 1024

_STATESTORED_ARGS = "-statestore_heartbeat_frequency_ms=%s "\
                    "-statestore_update_frequency_ms=%s" %\
                    (STATESTORE_RPC_FREQUENCY_MS, STATESTORE_RPC_FREQUENCY_MS)

# Key in the query profile for the query options.
# simple.
NUM_QUERIES = [15, 30, 50]

# Whether we will submit queries to all available impalads (in a round-robin fashion)
ROUND_ROBIN_SUBMISSION = [True, False]

# The query pool to use. The impalads should be configured to recognize this
# pool with the parameters below.
POOL_NAME = "default-pool"

# The statestore heartbeat and topic update frequency (ms). Set low for testing.
STATESTORE_RPC_FREQUENCY_MS = 500

# Stress test timeout (seconds). The timeout needs to be significantly higher in code
# coverage builds (IMPALA-3790).
STRESS_TIMEOUT = specific_build_type_timeout(30,
                                             code_coverage_build_timeout=600)

# The number of queries that can execute concurrently in the pool POOL_NAME.
MAX_NUM_CONCURRENT_QUERIES = 5

# The number of queries that can be queued in the pool POOL_NAME
MAX_NUM_QUEUED_QUERIES = 10

# Mem limit (bytes) used in the mem limit test
MEM_TEST_LIMIT = 12 * 1024 * 1024 * 1024

_STATESTORED_ARGS = "-statestore_heartbeat_frequency_ms=%s "\
                    "-statestore_update_frequency_ms=%s" %\
                    (STATESTORE_RPC_FREQUENCY_MS, STATESTORE_RPC_FREQUENCY_MS)

# Key in the query profile for the query options.
Ejemplo n.º 7
0
DEFAULT_HDFS_XML_CONF = os.path.join(os.environ['HADOOP_CONF_DIR'],
                                     "hdfs-site.xml")
DEFAULT_HIVE_SERVER2 = 'localhost:11050'
DEFAULT_IMPALAD_HS2_PORT = '21050'
DEFAULT_IMPALADS = "localhost:21000,localhost:21001,localhost:21002"
DEFAULT_KUDU_MASTER_HOSTS = os.getenv('KUDU_MASTER_HOSTS', '127.0.0.1')
DEFAULT_KUDU_MASTER_PORT = os.getenv('KUDU_MASTER_PORT', '7051')
DEFAULT_METASTORE_SERVER = 'localhost:9083'
DEFAULT_NAMENODE_ADDR = None
if FILESYSTEM == 'isilon':
    DEFAULT_NAMENODE_ADDR = "{node}:{port}".format(
        node=os.getenv("ISILON_NAMENODE"), port=ISILON_WEBHDFS_PORT)

# Timeout each individual test case after 2 hours, or 4 hours for slow builds
PYTEST_TIMEOUT_S = \
    specific_build_type_timeout(2 * 60 * 60, slow_build_timeout=4 * 60 * 60)


def pytest_configure(config):
    """ Hook startup of pytest. Sets up log format and per-test timeout. """
    configure_logging()
    config.option.timeout = PYTEST_TIMEOUT_S


def configure_logging():
    # Use a "--" since most of our tests output SQL commands, and it's nice to
    # be able to copy-paste directly from the test output back into a shell to
    # try to reproduce a failure.
    logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

Ejemplo n.º 8
0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.environ import specific_build_type_timeout
from tests.common.skip import SkipIfBuildType
from tests.util.filesystem_utils import IS_S3, IS_ADLS, IS_ISILON

# IMPALA-6100: add additional margin for error for slow build types.
SLOW_BUILD_TIMEOUT = 20000
DELAY_MS = specific_build_type_timeout(10000,
                                       slow_build_timeout=SLOW_BUILD_TIMEOUT)
# IMPALA-6381: Isilon can behave as a slow build.
# IMPALA-6811: S3/ADLS can also have a slow scan that requires a longer delay.
if IS_S3 or IS_ADLS or IS_ISILON:
    DELAY_MS = SLOW_BUILD_TIMEOUT


@SkipIfBuildType.not_dev_build
class TestExchangeDelays(CustomClusterTestSuite):
    """Tests for handling delays in finding data stream receivers"""
    @classmethod
    def get_workload(self):
        return 'functional-query'

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
Ejemplo n.º 9
0
    IMPALA_HOME, "bin/start-impalad.sh -build_type={build_type}".format(
        build_type=options.build_type))
STATE_STORE_PATH = os.path.join(
    IMPALA_HOME, "bin/start-statestored.sh -build_type={build_type}".format(
        build_type=options.build_type))
CATALOGD_PATH = os.path.join(
    IMPALA_HOME, "bin/start-catalogd.sh -build_type={build_type}".format(
        build_type=options.build_type))
MINI_IMPALA_CLUSTER_PATH = IMPALAD_PATH + " -in-process"

CLUSTER_WAIT_TIMEOUT_IN_SECONDS = 240
# Kills have a timeout to prevent automated scripts from hanging indefinitely.
# It is set to a high value to avoid failing if processes are slow to shut down.
KILL_TIMEOUT_IN_SECONDS = 240
# For build types like ASAN, modify the default Kudu rpc timeout.
KUDU_RPC_TIMEOUT = specific_build_type_timeout(0, slow_build_timeout=60000)


def find_user_processes(binaries):
    """Returns an iterator over all processes owned by the current user with a matching
  binary name from the provided list."""
    for pid in psutil.get_pid_list():
        try:
            process = psutil.Process(pid)
            if process.username == getuser() and process.name in binaries:
                yield process
        except KeyError, e:
            if "uid not found" not in str(e):
                raise
        except psutil.NoSuchProcess, e:
            # Ignore the case when a process no longer exists.
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.environ import specific_build_type_timeout
from tests.common.skip import SkipIfBuildType

# IMPALA-6100: add additional margin for error for slow build types.
DELAY_MS = specific_build_type_timeout(10000, slow_build_timeout=20000)


@SkipIfBuildType.not_dev_build
class TestExchangeDelays(CustomClusterTestSuite):
    """Tests for handling delays in finding data stream receivers"""
    @classmethod
    def get_workload(self):
        return 'functional-query'

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        "--stress_datastream_recvr_delay_ms={0}".format(DELAY_MS) +
        " --datastream_sender_timeout_ms=5000")
    def test_exchange_small_delay(self, vector):
        """Test delays in registering data stream receivers where the first one or two
Ejemplo n.º 11
0
    "bin/start-impalad.sh -build_type={build_type}".format(
        build_type=options.build_type))
STATE_STORE_PATH = os.path.join(IMPALA_HOME,
    "bin/start-statestored.sh -build_type={build_type}".format(
        build_type=options.build_type))
CATALOGD_PATH = os.path.join(IMPALA_HOME,
    "bin/start-catalogd.sh -build_type={build_type}".format(
        build_type=options.build_type))
MINI_IMPALA_CLUSTER_PATH = IMPALAD_PATH + " -in-process"

CLUSTER_WAIT_TIMEOUT_IN_SECONDS = 240
# Kills have a timeout to prevent automated scripts from hanging indefinitely.
# It is set to a high value to avoid failing if processes are slow to shut down.
KILL_TIMEOUT_IN_SECONDS = 240
# For build types like ASAN, modify the default Kudu rpc timeout.
KUDU_RPC_TIMEOUT = specific_build_type_timeout(0, slow_build_timeout=60000)

def find_user_processes(binaries):
  """Returns an iterator over all processes owned by the current user with a matching
  binary name from the provided list."""
  for pid in psutil.get_pid_list():
    try:
      process = psutil.Process(pid)
      if process.username == getuser() and process.name in binaries: yield process
    except KeyError, e:
      if "uid not found" not in str(e):
        raise
    except psutil.NoSuchProcess, e:
      # Ignore the case when a process no longer exists.
      pass