Exemple #1
0
 def hash_result_impl():
   result_log_filename = None
   try:
     with self._open_result_file(query, query_id) as result_log:
       result_log.write(query.sql)
       result_log.write("\n")
       result_log_filename = result_log.name
       current_thread().result = 1
       while should_continue.value:
         LOG.debug(
             "Fetching result for query with id %s",
             op_handle_to_query_id(
                 cursor._last_operation.handle if cursor._last_operation else None))
         rows = cursor.fetchmany(self.BATCH_SIZE)
         if not rows:
           LOG.debug(
               "No more results for query with id %s",
               op_handle_to_query_id(
                   cursor._last_operation.handle if cursor._last_operation else None))
           return
         for row in rows:
           current_thread().result = _add_row_to_hash(row, current_thread().result)
           if result_log:
             result_log.write(",".join([str(val) for val in row]))
             result_log.write("\thash=")
             result_log.write(str(current_thread().result))
             result_log.write("\n")
   except Exception as e:
     current_thread().error = e
   finally:
     # Only retain result files with incorrect results.
     if (result_log_filename is not None and
         current_thread().error is not None and
         current_thread().result == query.result_hash):
       os.remove(result_log_filename)
 def hash_result_impl():
     result_log = None
     try:
         file_name = '_'.join(
             [query.logical_query_id,
              query_id.replace(":", "_")])
         if query.result_hash is None:
             file_name += "_initial"
         file_name += "_results.txt"
         result_log = open(
             os.path.join(self.results_dir, RESULT_HASHES_DIR,
                          file_name), "w")
         result_log.write(query.sql)
         result_log.write("\n")
         current_thread().result = 1
         while should_continue.value:
             LOG.debug(
                 "Fetching result for query with id %s",
                 op_handle_to_query_id(
                     cursor._last_operation.handle if cursor.
                     _last_operation else None))
             rows = cursor.fetchmany(self.BATCH_SIZE)
             if not rows:
                 LOG.debug(
                     "No more results for query with id %s",
                     op_handle_to_query_id(
                         cursor._last_operation.handle if cursor.
                         _last_operation else None))
                 return
             for row in rows:
                 for idx, val in enumerate(row):
                     if val is None:
                         # The hash() of None can change from run to run since it's based on
                         # a memory address. A chosen value will be used instead.
                         val = 38463209
                     elif isinstance(val, float):
                         # Floats returned by Impala may not be deterministic, the ending
                         # insignificant digits may differ. Only the first 6 digits will be used
                         # after rounding.
                         sval = "%f" % val
                         dot_idx = sval.find(".")
                         val = round(val, 6 - dot_idx)
                     current_thread().result += (idx + 1) * hash(val)
                     # Modulo the result to keep it "small" otherwise the math ops can be slow
                     # since python does infinite precision math.
                     current_thread().result %= maxint
                     if result_log:
                         result_log.write(str(val))
                         result_log.write("\t")
                         result_log.write(str(current_thread().result))
                         result_log.write("\n")
     except Exception as e:
         current_thread().error = e
     finally:
         if result_log is not None:
             result_log.close()
             if (current_thread().error is not None
                     and current_thread().result == query.result_hash):
                 os.remove(result_log.name)
Exemple #3
0
  def _fetch_and_hash_result(self, cursor, timeout_unix_time, query):
    """Fetches results from 'cursor' and returns a hash that is independent of row order.
    Raises QueryTimeout() if we couldn't fetch all rows from the query before time()
    reaches 'timeout_unix_time'.
    'query' is only used for debug logging purposes (if the result is not as expected a
    log file will be left in RESULTS_DIR for investigation).
    """
    query_id = op_handle_to_query_id(cursor._last_operation.handle if
                                     cursor._last_operation else None)

    # A value of 1 indicates that the hash thread should continue to work.
    should_continue = Value("i", 1)

    def hash_result_impl():
      result_log_filename = None
      try:
        with self._open_result_file(query, query_id) as result_log:
          result_log.write(query.sql)
          result_log.write("\n")
          result_log_filename = result_log.name
          current_thread().result = 1
          while should_continue.value:
            LOG.debug(
                "Fetching result for query with id %s",
                op_handle_to_query_id(
                    cursor._last_operation.handle if cursor._last_operation else None))
            rows = cursor.fetchmany(self.BATCH_SIZE)
            if not rows:
              LOG.debug(
                  "No more results for query with id %s",
                  op_handle_to_query_id(
                      cursor._last_operation.handle if cursor._last_operation else None))
              return
            for row in rows:
              current_thread().result = _add_row_to_hash(row, current_thread().result)
              if result_log:
                result_log.write(",".join([str(val) for val in row]))
                result_log.write("\thash=")
                result_log.write(str(current_thread().result))
                result_log.write("\n")
      except Exception as e:
        current_thread().error = e
      finally:
        # Only retain result files with incorrect results.
        if (result_log_filename is not None and
            current_thread().error is not None and
            current_thread().result == query.result_hash):
          os.remove(result_log_filename)

    # TODO: IMPALA-6326: don't fork a thread here
    hash_thread = create_and_start_daemon_thread(
        hash_result_impl, "Fetch Results %s" % query_id)
    hash_thread.join(max(timeout_unix_time - time(), 0))
    if hash_thread.is_alive():
      should_continue.value = 0
      raise QueryTimeout()
    if hash_thread.error:
      raise hash_thread.error
    return hash_thread.result
Exemple #4
0
  def _fetch_and_hash_result(self, cursor, timeout_unix_time, query):
    """Fetches results from 'cursor' and returns a hash that is independent of row order.
    Raises QueryTimeout() if we couldn't fetch all rows from the query before time()
    reaches 'timeout_unix_time'.
    'query' is only used for debug logging purposes (if the result is not as expected a
    log file will be left in RESULTS_DIR for investigation).
    """
    query_id = op_handle_to_query_id(cursor._last_operation.handle if
                                     cursor._last_operation else None)

    result_log_filename = None
    curr_hash = 1
    try:
      with self._open_result_file(query, query_id) as result_log:
        result_log.write(query.sql)
        result_log.write("\n")
        result_log_filename = result_log.name
        while True:
          # Check for timeout before each fetch call. Fetching can block indefinitely,
          # e.g. if the query is a selective scan, so we may miss the timeout, but we
          # will live with that limitation for now.
          # TODO: IMPALA-8289: use a non-blocking fetch when support is added
          if time() >= timeout_unix_time:
            LOG.debug("Hit timeout before fetch for query with id {0} {1} >= {2}".format(
                query_id, time(), timeout_unix_time))
            raise QueryTimeout()
          LOG.debug("Fetching result for query with id {0}".format(query_id))
          rows = cursor.fetchmany(self.BATCH_SIZE)
          if not rows:
            LOG.debug("No more results for query with id {0}".format(query_id))
            break
          for row in rows:
            curr_hash = _add_row_to_hash(row, curr_hash)
            if result_log:
              result_log.write(",".join([str(val) for val in row]))
              result_log.write("\thash=")
              result_log.write(str(curr_hash))
              result_log.write("\n")
    except Exception:
      # Don't retain result files for failed queries.
      if result_log_filename is not None:
        os.remove(result_log_filename)
      raise

    # Only retain result files with incorrect results.
    if result_log_filename is not None and curr_hash == query.result_hash:
      os.remove(result_log_filename)
    return curr_hash
Exemple #5
0
  def run_query(self, query, mem_limit_mb, run_set_up=False,
                timeout_secs=maxint, should_cancel=False, retain_profile=False):
    """Run a query and return an execution report. If 'run_set_up' is True, set up sql
    will be executed before the main query. This should be the case during the binary
    search phase of the stress test.
    If 'should_cancel' is True, don't get the query profile for timed out queries because
    the query was purposely cancelled by setting the query timeout too short to complete,
    rather than having some problem that needs to be investigated.
    """
    if not self.impalad_conn:
      raise Exception("connect() must first be called")

    timeout_unix_time = time() + timeout_secs
    report = QueryReport(query)
    try:
      with self.impalad_conn.cursor() as cursor:
        start_time = time()
        self._set_db_and_options(cursor, query, run_set_up, mem_limit_mb, timeout_secs)
        error = None
        try:
          cursor.execute_async(
              "/* Mem: %s MB. Coordinator: %s. */\n"
              % (mem_limit_mb, self.impalad.host_name) + query.sql)
          report.query_id = op_handle_to_query_id(cursor._last_operation.handle if
                                                  cursor._last_operation else None)
          LOG.debug("Query id is %s", report.query_id)
          if not self._wait_until_fetchable(cursor, report, timeout_unix_time,
                                            should_cancel):
            return report

          if query.query_type == QueryType.SELECT:
            try:
              report.result_hash = self._fetch_and_hash_result(cursor, timeout_unix_time,
                                                               query)
              if retain_profile or \
                 query.result_hash and report.result_hash != query.result_hash:
                fetch_and_set_profile(cursor, report)
            except QueryTimeout:
              # TODO: IMPALA-6326: remove this cancel, which can race with the thread
              # in _fetch_and_hash_result() and cause crashes and other errors.
              self._cancel(cursor, report)
              return report
          else:
            # If query is in error state, this will raise an exception
            cursor._wait_to_finish()
        except Exception as error:
          report.query_id = op_handle_to_query_id(cursor._last_operation.handle if
                                                  cursor._last_operation else None)
          LOG.debug("Error running query with id %s: %s", report.query_id, error)
          self._check_for_memory_errors(report, cursor, error)
        if report.has_query_error():
          return report
        report.runtime_secs = time() - start_time
        if cursor.execution_failed() or self.check_if_mem_was_spilled:
          fetch_and_set_profile(cursor, report)
          report.mem_was_spilled = any([
              pattern.search(report.profile) is not None
              for pattern in QueryRunner.SPILLED_PATTERNS])
          report.not_enough_memory = "Memory limit exceeded" in report.profile
    except Exception as error:
      # A mem limit error would have been caught above, no need to check for that here.
      report.other_error = error
    return report
Exemple #6
0
  def run_query(self, query, mem_limit_mb, run_set_up=False,
                timeout_secs=maxint, cancel_mech=None, retain_profile=False):
    """Run a query and return an execution report. If 'run_set_up' is True, set up sql
    will be executed before the main query. This should be the case during the binary
    search phase of the stress test. 'cancel_mech' is optionally a CancelMechanism
    value that should be used to cancel the query after timeout_secs.
    If 'cancel_mech' is provided, don't get the query profile for timed out queries
    because the query was purposely cancelled, rather than having some problem that needs
    investigation.
    """
    assert self.impalad_conn, "connect() must be called before run_query()"
    assert cancel_mech is None or cancel_mech in CancelMechanism.ALL_MECHS

    timeout_unix_time = time() + timeout_secs
    report = QueryReport(query)
    try:
      with self.impalad_conn.cursor() as cursor:
        start_time = time()
        self._set_db_and_options(cursor, query, run_set_up, mem_limit_mb, timeout_secs,
            cancel_mech)
        error = None
        try:
          cursor.execute_async(
              "/* Mem: %s MB. Coordinator: %s. */\n"
              % (mem_limit_mb, self.impalad.host_name) + query.sql)
          report.query_id = op_handle_to_query_id(cursor._last_operation.handle if
                                                  cursor._last_operation else None)
          LOG.debug("Query id is %s", report.query_id)
          self._wait_until_fetchable(cursor, report, timeout_unix_time)
          if query.query_type == QueryType.SELECT:
            report.result_hash = self._fetch_and_hash_result(cursor, timeout_unix_time,
                                                             query)
          else:
            # If query is in error state, this will raise an exception
            cursor._wait_to_finish()
          if (retain_profile or
             query.result_hash and report.result_hash != query.result_hash):
            fetch_and_set_profile(cursor, report)
        except QueryTimeout:
          if not cancel_mech:
            fetch_and_set_profile(cursor, report)
          # Cancel from this client if a) we hit the timeout unexpectedly or b) we're
          # deliberately cancelling the query via the client.
          if not cancel_mech or cancel_mech == CancelMechanism.VIA_CLIENT:
            self._cancel(cursor, report)
          else:
            # Wait until the query is cancelled by a different mechanism.
            self._wait_until_cancelled(cursor, report.query_id)
          report.timed_out = True
          return report
        except Exception as error:
          report.query_id = op_handle_to_query_id(cursor._last_operation.handle if
                                                  cursor._last_operation else None)
          LOG.debug("Error running query with id %s: %s", report.query_id, error)
          if (cancel_mech == CancelMechanism.VIA_OPTION and
                is_exec_time_limit_error(error)):
            # Timeout via query option counts as a timeout.
            report.timed_out = True
          else:
            self._check_for_memory_errors(report, cursor, error)
        if report.has_query_error():
          return report
        report.runtime_secs = time() - start_time
        if cursor.execution_failed() or self.check_if_mem_was_spilled:
          fetch_and_set_profile(cursor, report)
          report.mem_was_spilled = any([
              pattern.search(report.profile) is not None
              for pattern in QueryRunner.SPILLED_PATTERNS])
          # TODO: is this needed? Memory errors are generally caught by the try/except.
          report.not_enough_memory = "Memory limit exceeded" in report.profile
    except Exception as error:
      # A mem limit error would have been caught above, no need to check for that here.
      LOG.debug("Caught error", error)
      report.other_error = error
    return report
    def _hash_result(self, cursor, timeout_unix_time, query):
        """Returns a hash that is independent of row order. 'query' is only used for debug
    logging purposes (if the result is not as expected a log file will be left for
    investigations).
    """
        query_id = op_handle_to_query_id(
            cursor._last_operation.handle if cursor._last_operation else None)

        # A value of 1 indicates that the hash thread should continue to work.
        should_continue = Value("i", 1)

        def hash_result_impl():
            result_log = None
            try:
                file_name = '_'.join(
                    [query.logical_query_id,
                     query_id.replace(":", "_")])
                if query.result_hash is None:
                    file_name += "_initial"
                file_name += "_results.txt"
                result_log = open(
                    os.path.join(self.results_dir, RESULT_HASHES_DIR,
                                 file_name), "w")
                result_log.write(query.sql)
                result_log.write("\n")
                current_thread().result = 1
                while should_continue.value:
                    LOG.debug(
                        "Fetching result for query with id %s",
                        op_handle_to_query_id(
                            cursor._last_operation.handle if cursor.
                            _last_operation else None))
                    rows = cursor.fetchmany(self.BATCH_SIZE)
                    if not rows:
                        LOG.debug(
                            "No more results for query with id %s",
                            op_handle_to_query_id(
                                cursor._last_operation.handle if cursor.
                                _last_operation else None))
                        return
                    for row in rows:
                        for idx, val in enumerate(row):
                            if val is None:
                                # The hash() of None can change from run to run since it's based on
                                # a memory address. A chosen value will be used instead.
                                val = 38463209
                            elif isinstance(val, float):
                                # Floats returned by Impala may not be deterministic, the ending
                                # insignificant digits may differ. Only the first 6 digits will be used
                                # after rounding.
                                sval = "%f" % val
                                dot_idx = sval.find(".")
                                val = round(val, 6 - dot_idx)
                            current_thread().result += (idx + 1) * hash(val)
                            # Modulo the result to keep it "small" otherwise the math ops can be slow
                            # since python does infinite precision math.
                            current_thread().result %= maxint
                            if result_log:
                                result_log.write(str(val))
                                result_log.write("\t")
                                result_log.write(str(current_thread().result))
                                result_log.write("\n")
            except Exception as e:
                current_thread().error = e
            finally:
                if result_log is not None:
                    result_log.close()
                    if (current_thread().error is not None
                            and current_thread().result == query.result_hash):
                        os.remove(result_log.name)

        hash_thread = create_and_start_daemon_thread(
            hash_result_impl, "Fetch Results %s" % query_id)
        hash_thread.join(max(timeout_unix_time - time(), 0))
        if hash_thread.is_alive():
            should_continue.value = 0
            raise QueryTimeout()
        if hash_thread.error:
            raise hash_thread.error
        return hash_thread.result