def establish_beeswax_connection(query_config):
  """Establish a connection to the user specified impalad.

  Args:
    query_config (QueryExecConfig)

  Returns:
    ImpalaBeeswaxClient is the connection suceeds, None otherwise.
  """
  use_kerberos = query_config.use_kerberos
  user = query_config.user
  password = query_config.password
  use_ssl = query_config.use_ssl
  # If the impalad is for the form host, convert it to host:port that the Impala beeswax
  # client accepts.
  if len(query_config.impalad.split(":")) == 1:
    query_config.impalad = "{0}:{1}".format(query_config.impalad, DEFAULT_BEESWAX_PORT)
  client = None
  try:
    client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos,
                                 user=user, password=password, use_ssl=use_ssl)
    # Try connect
    client.connect()
    # Set the exec options.
    client.set_query_options(query_config.exec_options)
    LOG.info("Connected to %s" % query_config.impalad)
  except Exception, e:
    LOG.error("Error connecting: {0}".format(str(e)))
 def __init__(self,
              host_port,
              use_kerberos=False,
              user=None,
              password=None,
              use_ssl=False):
     self.__beeswax_client = ImpalaBeeswaxClient(host_port,
                                                 use_kerberos,
                                                 user=user,
                                                 password=password,
                                                 use_ssl=use_ssl)
     self.__host_port = host_port
     self.QUERY_STATES = self.__beeswax_client.query_states
Exemple #3
0
def establish_beeswax_connection(query, query_config):
  """Establish a connection to the user specified impalad.

  Args:
    query_config (QueryExecConfig)

  Returns:
    (boolean, ImpalaBeeswaxClient): True if successful
  """

  # TODO: Make this generic, for hive etc.
  use_kerberos = query_config.use_kerberos
  client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos)
  # Try connect
  client.connect()
  # Set the exec options.
  client.set_query_options(query_config.exec_options)
  LOG.info("Connected to %s" % query_config.impalad)
  return (True, client)
def establish_beeswax_connection(query, query_config):
  """Establish a connection to the user specified impalad.

  Args:
    query_config (QueryExecConfig)

  Returns:
    (boolean, ImpalaBeeswaxClient): True if successful
  """
  use_kerberos = query_config.use_kerberos
  # If the impalad is for the form host, convert it to host:port that the Impala beeswax
  # client accepts.
  if len(query_config.impalad.split(":")) == 1:
    query_config.impalad = "{0}:{1}".format(query_config.impalad, DEFAULT_BEESWAX_PORT)
  client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos)
  # Try connect
  client.connect()
  # Set the exec options.
  client.set_query_options(query_config.exec_options)
  LOG.info("Connected to %s" % query_config.impalad)
  return (True, client)
def establish_beeswax_connection(query_config):
    """Establish a connection to the user specified impalad.

  Args:
    query_config (QueryExecConfig)

  Returns:
    ImpalaBeeswaxClient is the connection suceeds, None otherwise.
  """
    use_kerberos = query_config.use_kerberos
    # If the impalad is for the form host, convert it to host:port that the Impala beeswax
    # client accepts.
    if len(query_config.impalad.split(":")) == 1:
        query_config.impalad = "{0}:{1}".format(query_config.impalad,
                                                DEFAULT_BEESWAX_PORT)
    client = None
    try:
        client = ImpalaBeeswaxClient(query_config.impalad,
                                     use_kerberos=use_kerberos)
        # Try connect
        client.connect()
        # Set the exec options.
        client.set_query_options(query_config.exec_options)
        LOG.info("Connected to %s" % query_config.impalad)
    except Exception, e:
        LOG.error("Error connecting: {0}".format(str(e)))
 def client_factory():
     impala_client = ImpalaBeeswaxClient(options.impalad,
                                         use_kerberos=options.use_kerberos,
                                         use_ssl=options.use_ssl)
     impala_client.connect()
     yield impala_client
     impala_client.close_connection()
Exemple #7
0
def establish_beeswax_connection(query, query_config):
    """Establish a connection to the user specified impalad.

  Args:
    query_config (QueryExecConfig)

  Returns:
    (boolean, ImpalaBeeswaxClient): True if successful
  """

    # TODO: Make this generic, for hive etc.
    use_kerberos = query_config.use_kerberos
    client = ImpalaBeeswaxClient(query_config.impalad,
                                 use_kerberos=use_kerberos)
    # Try connect
    client.connect()
    # Set the exec options.
    client.set_query_options(query_config.exec_options)
    LOG.info("Connected to %s" % query_config.impalad)
    return (True, client)
Exemple #8
0
def establish_beeswax_connection(query, query_config):
    """Establish a connection to the user specified impalad.

  Args:
    query_config (QueryExecConfig)

  Returns:
    (boolean, ImpalaBeeswaxClient): True if successful
  """
    use_kerberos = query_config.use_kerberos
    # If the impalad is for the form host, convert it to host:port that the Impala beeswax
    # client accepts.
    if len(query_config.impalad.split(":")) == 1:
        query_config.impalad = "{0}:{1}".format(query_config.impalad,
                                                DEFAULT_BEESWAX_PORT)
    client = ImpalaBeeswaxClient(query_config.impalad,
                                 use_kerberos=use_kerberos)
    # Try connect
    client.connect()
    # Set the exec options.
    client.set_query_options(query_config.exec_options)
    LOG.info("Connected to %s" % query_config.impalad)
    return (True, client)
class BeeswaxConnection(ImpalaConnection):
  def __init__(self, host_port, use_kerberos=False, user=None, password=None,
               use_ssl=False):
    self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user,
                                                password=password, use_ssl=use_ssl)
    self.__host_port = host_port
    self.QUERY_STATES = self.__beeswax_client.query_states

  def set_configuration_option(self, name, value):
    # Only set the option if it's not already set to the same value.
    if self.__beeswax_client.get_query_option(name) != value:
      LOG.info('SET %s=%s;' % (name, value))
      self.__beeswax_client.set_query_option(name, value)

  def get_default_configuration(self):
    result = {}
    for item in self.__beeswax_client.get_default_configuration():
      result[item.key] = item.value
    return result

  def clear_configuration(self):
    self.__beeswax_client.clear_query_options()
    # A hook in conftest sets tests.common.current_node.
    if hasattr(tests.common, "current_node"):
      self.set_configuration_option("client_identifier", tests.common.current_node)

  def connect(self):
    LOG.info("-- connecting to: %s" % self.__host_port)
    self.__beeswax_client.connect()

  # TODO: rename to close_connection
  def close(self):
    LOG.info("-- closing connection to: %s" % self.__host_port)
    self.__beeswax_client.close_connection()

  def close_query(self, operation_handle):
    LOG.info("-- closing query for operation handle: %s" % operation_handle)
    self.__beeswax_client.close_query(operation_handle.get_handle())

  def execute(self, sql_stmt, user=None):
    LOG.info("-- executing against %s\n%s;\n" % (self.__host_port, sql_stmt))
    return self.__beeswax_client.execute(sql_stmt, user=user)

  def execute_async(self, sql_stmt, user=None):
    LOG.info("-- executing async: %s\n%s;\n" % (self.__host_port, sql_stmt))
    beeswax_handle = self.__beeswax_client.execute_query_async(sql_stmt, user=user)
    return OperationHandle(beeswax_handle, sql_stmt)

  def cancel(self, operation_handle):
    LOG.info("-- canceling operation: %s" % operation_handle)
    return self.__beeswax_client.cancel_query(operation_handle.get_handle())

  def get_state(self, operation_handle):
    LOG.info("-- getting state for operation: %s" % operation_handle)
    return self.__beeswax_client.get_state(operation_handle.get_handle())

  def get_exec_summary(self, operation_handle):
    LOG.info("-- getting exec summary operation: %s" % operation_handle)
    return self.__beeswax_client.get_exec_summary(operation_handle.get_handle())

  def get_runtime_profile(self, operation_handle):
    LOG.info("-- getting runtime profile operation: %s" % operation_handle)
    return self.__beeswax_client.get_runtime_profile(operation_handle.get_handle())

  def wait_for_finished_timeout(self, operation_handle, timeout):
    LOG.info("-- waiting for query to reach FINISHED state: %s" % operation_handle)
    return self.__beeswax_client.wait_for_finished_timeout(
      operation_handle.get_handle(), timeout)

  def wait_for_admission_control(self, operation_handle):
    LOG.info("-- waiting for completion of the admission control processing of the "
        "query: %s" % operation_handle)
    return self.__beeswax_client.wait_for_admission_control(operation_handle.get_handle())

  def get_admission_result(self, operation_handle):
    LOG.info("-- getting the admission result: %s" % operation_handle)
    return self.__beeswax_client.get_admission_result(operation_handle.get_handle())

  def get_log(self, operation_handle):
    LOG.info("-- getting log for operation: %s" % operation_handle)
    return self.__beeswax_client.get_log(operation_handle.get_handle())

  def fetch(self, sql_stmt, operation_handle, max_rows = -1):
    LOG.info("-- fetching results from: %s" % operation_handle)
    return self.__beeswax_client.fetch_results(
        sql_stmt, operation_handle.get_handle(), max_rows)
Exemple #10
0
class BeeswaxConnection(ImpalaConnection):
    def __init__(self,
                 host_port,
                 use_kerberos=False,
                 user=None,
                 password=None,
                 use_ssl=False):
        self.__beeswax_client = ImpalaBeeswaxClient(host_port,
                                                    use_kerberos,
                                                    user=user,
                                                    password=password,
                                                    use_ssl=use_ssl)
        self.__host_port = host_port
        self.QUERY_STATES = self.__beeswax_client.query_states

    def set_configuration_option(self, name, value):
        # Only set the option if it's not already set to the same value.
        if self.__beeswax_client.get_query_option(name) != value:
            LOG.info('SET %s=%s;' % (name, value))
            self.__beeswax_client.set_query_option(name, value)

    def get_default_configuration(self):
        result = {}
        for item in self.__beeswax_client.get_default_configuration():
            result[item.key] = item.value
        return result

    def clear_configuration(self):
        self.__beeswax_client.clear_query_options()
        # A hook in conftest sets tests.common.current_node.
        if hasattr(tests.common, "current_node"):
            self.set_configuration_option("client_identifier",
                                          tests.common.current_node)

    def connect(self):
        LOG.info("-- connecting to: %s" % self.__host_port)
        self.__beeswax_client.connect()

    # TODO: rename to close_connection
    def close(self):
        LOG.info("-- closing connection to: %s" % self.__host_port)
        self.__beeswax_client.close_connection()

    def close_query(self, operation_handle):
        LOG.info("-- closing query for operation handle: %s" %
                 operation_handle)
        self.__beeswax_client.close_query(operation_handle.get_handle())

    def close_dml(self, operation_handle):
        LOG.info("-- closing DML query for operation handle: %s" %
                 operation_handle)
        self.__beeswax_client.close_dml(operation_handle.get_handle())

    def execute(self, sql_stmt, user=None):
        LOG.info("-- executing against %s\n" % (self.__host_port))
        log_sql_stmt(sql_stmt)
        return self.__beeswax_client.execute(sql_stmt, user=user)

    def execute_async(self, sql_stmt, user=None):
        LOG.info("-- executing async: %s\n" % (self.__host_port))
        log_sql_stmt(sql_stmt)
        beeswax_handle = self.__beeswax_client.execute_query_async(sql_stmt,
                                                                   user=user)
        return OperationHandle(beeswax_handle, sql_stmt)

    def cancel(self, operation_handle):
        LOG.info("-- canceling operation: %s" % operation_handle)
        return self.__beeswax_client.cancel_query(
            operation_handle.get_handle())

    def get_state(self, operation_handle):
        LOG.info("-- getting state for operation: %s" % operation_handle)
        return self.__beeswax_client.get_state(operation_handle.get_handle())

    def state_is_finished(self, operation_handle):
        LOG.info("-- checking finished state for operation: {0}".format(
            operation_handle))
        return self.get_state(
            operation_handle) == self.QUERY_STATES["FINISHED"]

    def get_exec_summary(self, operation_handle):
        LOG.info("-- getting exec summary operation: %s" % operation_handle)
        return self.__beeswax_client.get_exec_summary(
            operation_handle.get_handle())

    def get_runtime_profile(self, operation_handle):
        LOG.info("-- getting runtime profile operation: %s" % operation_handle)
        return self.__beeswax_client.get_runtime_profile(
            operation_handle.get_handle())

    def wait_for_finished_timeout(self, operation_handle, timeout):
        LOG.info("-- waiting for query to reach FINISHED state: %s" %
                 operation_handle)
        return self.__beeswax_client.wait_for_finished_timeout(
            operation_handle.get_handle(), timeout)

    def wait_for_admission_control(self, operation_handle):
        LOG.info(
            "-- waiting for completion of the admission control processing of the "
            "query: %s" % operation_handle)
        return self.__beeswax_client.wait_for_admission_control(
            operation_handle.get_handle())

    def get_admission_result(self, operation_handle):
        LOG.info("-- getting the admission result: %s" % operation_handle)
        return self.__beeswax_client.get_admission_result(
            operation_handle.get_handle())

    def get_log(self, operation_handle):
        LOG.info("-- getting log for operation: %s" % operation_handle)
        return self.__beeswax_client.get_log(
            operation_handle.get_handle().log_context)

    def fetch(self, sql_stmt, operation_handle, max_rows=-1):
        LOG.info("-- fetching results from: %s" % operation_handle)
        return self.__beeswax_client.fetch_results(
            sql_stmt, operation_handle.get_handle(), max_rows)
Exemple #11
0
class BeeswaxConnection(ImpalaConnection):
    def __init__(self,
                 host_port,
                 use_kerberos=False,
                 user=None,
                 password=None,
                 use_ssl=False):
        self.__beeswax_client = ImpalaBeeswaxClient(host_port,
                                                    use_kerberos,
                                                    user=user,
                                                    password=password,
                                                    use_ssl=use_ssl)
        self.__host_port = host_port
        self.QUERY_STATES = self.__beeswax_client.query_states

    def set_configuration_option(self, name, value):
        # Only set the option if it's not already set to the same value.
        if self.__beeswax_client.get_query_option(name) != value:
            LOG.info('SET %s=%s;' % (name, value))
            self.__beeswax_client.set_query_option(name, value)

    def get_configuration(self):
        return self.__beeswax_client.get_query_options

    def set_configuration(self, config_option_dict):
        assert config_option_dict is not None, "config_option_dict cannot be None"
        self.clear_configuration()
        for name, value in config_option_dict.iteritems():
            self.set_configuration_option(name, value)

    def clear_configuration(self):
        self.__beeswax_client.clear_query_options()

    def connect(self):
        LOG.info("-- connecting to: %s" % self.__host_port)
        self.__beeswax_client.connect()

    # TODO: rename to close_connection
    def close(self):
        LOG.info("-- closing connection to: %s" % self.__host_port)
        self.__beeswax_client.close_connection()

    def close_query(self, operation_handle):
        LOG.info("-- closing query for operation handle: %s" %
                 operation_handle)
        self.__beeswax_client.close_query(operation_handle.get_handle())

    def execute(self, sql_stmt):
        LOG.info("-- executing against %s\n%s;\n" %
                 (self.__host_port, sql_stmt))
        return self.__beeswax_client.execute(sql_stmt)

    def execute_async(self, sql_stmt):
        LOG.info("-- executing async: %s\n%s;\n" %
                 (self.__host_port, sql_stmt))
        return OperationHandle(
            self.__beeswax_client.execute_query_async(sql_stmt))

    def cancel(self, operation_handle):
        LOG.info("-- canceling operation: %s" % operation_handle)
        return self.__beeswax_client.cancel_query(
            operation_handle.get_handle())

    def get_state(self, operation_handle):
        LOG.info("-- getting state for operation: %s" % operation_handle)
        return self.__beeswax_client.get_state(operation_handle.get_handle())

    def get_runtime_profile(self, operation_handle):
        LOG.info("-- getting runtime profile operation: %s" % operation_handle)
        return self.__beeswax_client.get_runtime_profile(
            operation_handle.get_handle())

    def get_log(self, operation_handle):
        LOG.info("-- getting log for operation: %s" % operation_handle)
        return self.__beeswax_client.get_log(operation_handle.get_handle())

    def refresh(self):
        """Invalidate the Impalad catalog"""
        return self.execute("invalidate metadata")

    def invalidate_table(self, table_name):
        """Invalidate a specific table from the catalog"""
        return self.execute("invalidate metadata %s" % (table_name))

    def refresh_table(self, db_name, table_name):
        """Refresh a specific table from the catalog"""
        return self.execute("refresh %s.%s" % (db_name, table_name))

    def fetch(self, sql_stmt, operation_handle, max_rows=-1):
        LOG.info("-- fetching results from: %s" % operation_handle)
        return self.__beeswax_client.fetch_results(
            sql_stmt, operation_handle.get_handle(), max_rows)
 def __init__(self, host_port, use_kerberos=False, user=None, password=None,
              use_ssl=False):
   self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user,
                                               password=password, use_ssl=use_ssl)
   self.__host_port = host_port
   self.QUERY_STATES = self.__beeswax_client.query_states
class BeeswaxConnection(ImpalaConnection):
  def __init__(self, host_port, use_kerberos=False, user=None, password=None,
               use_ssl=False):
    self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user,
                                                password=password, use_ssl=use_ssl)
    self.__host_port = host_port
    self.QUERY_STATES = self.__beeswax_client.query_states

  def set_configuration_option(self, name, value):
    # Only set the option if it's not already set to the same value.
    if self.__beeswax_client.get_query_option(name) != value:
      LOG.info('SET %s=%s;' % (name, value))
      self.__beeswax_client.set_query_option(name, value)

  def get_configuration(self):
    return self.__beeswax_client.get_query_options

  def set_configuration(self, config_option_dict):
    assert config_option_dict is not None, "config_option_dict cannot be None"
    self.clear_configuration()
    for name, value in config_option_dict.iteritems():
      self.set_configuration_option(name, value)

  def clear_configuration(self):
    self.__beeswax_client.clear_query_options()

  def connect(self):
    LOG.info("-- connecting to: %s" % self.__host_port)
    self.__beeswax_client.connect()

  # TODO: rename to close_connection
  def close(self):
    LOG.info("-- closing connection to: %s" % self.__host_port)
    self.__beeswax_client.close_connection()

  def close_query(self, operation_handle):
    LOG.info("-- closing query for operation handle: %s" % operation_handle)
    self.__beeswax_client.close_query(operation_handle.get_handle())

  def execute(self, sql_stmt):
    LOG.info("-- executing against %s\n%s;\n" % (self.__host_port, sql_stmt))
    return self.__beeswax_client.execute(sql_stmt)

  def execute_async(self, sql_stmt):
    LOG.info("-- executing async: %s\n%s;\n" % (self.__host_port, sql_stmt))
    return OperationHandle(self.__beeswax_client.execute_query_async(sql_stmt))

  def cancel(self, operation_handle):
    LOG.info("-- canceling operation: %s" % operation_handle)
    return self.__beeswax_client.cancel_query(operation_handle.get_handle())

  def get_state(self, operation_handle):
    LOG.info("-- getting state for operation: %s" % operation_handle)
    return self.__beeswax_client.get_state(operation_handle.get_handle())

  def get_runtime_profile(self, operation_handle):
    LOG.info("-- getting runtime profile operation: %s" % operation_handle)
    return self.__beeswax_client.get_runtime_profile(operation_handle.get_handle())

  def get_log(self, operation_handle):
    LOG.info("-- getting log for operation: %s" % operation_handle)
    return self.__beeswax_client.get_log(operation_handle.get_handle())

  def refresh(self):
    """Invalidate the Impalad catalog"""
    return self.execute("invalidate metadata")

  def invalidate_table(self, table_name):
    """Invalidate a specific table from the catalog"""
    return self.execute("invalidate metadata %s" % (table_name))

  def refresh_table(self, db_name, table_name):
    """Refresh a specific table from the catalog"""
    return self.execute("refresh %s.%s" % (db_name, table_name))

  def fetch(self, sql_stmt, operation_handle, max_rows = -1):
    LOG.info("-- fetching results from: %s" % operation_handle)
    return self.__beeswax_client.fetch_results(
        sql_stmt, operation_handle.get_handle(), max_rows)
Exemple #14
0
 def __init__(self, host_port, use_kerberos=False):
     self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos)
     self.__host_port = host_port
     self.QUERY_STATES = self.__beeswax_client.query_states
 def __init__(self, host_port, use_kerberos=False):
   self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos)
   self.__host_port = host_port
   self.QUERY_STATES = self.__beeswax_client.query_states
  parser.add_option("--use_kerberos", action="store_true", default=False,
                    help="Compute stats on a kerberized cluster.")
  parser.add_option("--use_ssl", action="store_true", default=False,
                    help="Compute stats on a cluster with SSL enabled.")
  parser.add_option("--db_names", dest="db_names", default=None,
                    help="Comma-separated list of database names for which to compute "\
                    "stats. Can be used in conjunction with the --table_names flag. "\
                    "If not specified, compute stats will run on tables from all "\
                    "databases.")
  parser.add_option("--table_names", dest="table_names", default=None,
                    help="Comma-separated list of table names to compute stats over. A"\
                    " substring comparison is done. If no tables are specified stats "\
                    "are computed across all tables.")
  options, args = parser.parse_args()
  table_names = None
  if options.table_names is not None:
    table_names = [name.lower().strip() for name in options.table_names.split(',')]

  db_names = None
  if options.db_names is not None:
    db_names = [name.lower().strip() for name in options.db_names.split(',')]

  impala_client = ImpalaBeeswaxClient(options.impalad, use_kerberos=options.use_kerberos,
      use_ssl=options.use_ssl)
  impala_client.connect()
  try:
    compute_stats(impala_client, db_names=db_names,
        table_names=table_names, continue_on_error=options.continue_on_error)
  finally:
    impala_client.close_connection()
 def client_factory():
   impala_client = ImpalaBeeswaxClient(options.impalad,
       use_kerberos=options.use_kerberos, use_ssl=options.use_ssl)
   impala_client.connect()
   yield impala_client
   impala_client.close_connection()