예제 #1
0
    def test_invalidate(self):
        with patch('impala.dbms.ImpalaDbms._get_different_tables'
                   ) as get_different_tables:
            with patch(
                    'desktop.models.ClusterConfig.get_hive_metastore_interpreters'
            ) as get_hive_metastore_interpreters:
                ddms = ImpalaDbms(Mock(query_server={'server_name': ''}), None)
                get_different_tables.return_value = ['customers']

                get_hive_metastore_interpreters.return_value = []
                assert_raises(PopupException, ddms.invalidate,
                              'default')  # No hive/metastore configured

                get_hive_metastore_interpreters.return_value = ['hive']
                ddms.invalidate('default')
                ddms.client.query.assert_called_once_with(
                    ddms.client.query.call_args[0][0])
                assert_true('customers' in ddms.client.query.call_args[0]
                            [0].hql_query)  # diff of 1 table

                get_different_tables.return_value = [
                    'customers', '', '', '', '', '', '', '', '', '', ''
                ]
                assert_raises(PopupException, ddms.invalidate,
                              'default')  # diff of 11 tables. Limit is 10.

                ddms.invalidate('default', 'customers')
                assert_true(ddms.client.query.call_count == 2)  # Second call
                assert_true('customers' in ddms.client.query.call_args[0]
                            [0].hql_query)  # invalidate 1 table

                ddms.invalidate()
                assert_true(ddms.client.query.call_count == 3)  # Third call
                assert_true('customers' not in ddms.client.query.call_args[0]
                            [0].hql_query)  # Full invalidate
예제 #2
0
def get(user, query_server=None, cluster=None):
  global DBMS_CACHE
  global DBMS_CACHE_LOCK

  if query_server is None:
    query_server = get_query_server_config(cluster=cluster)

  DBMS_CACHE_LOCK.acquire()
  try:
    DBMS_CACHE.setdefault(user.username, {})

    if query_server['server_name'] not in DBMS_CACHE[user.username]:
      # Avoid circular dependency
      from beeswax.server.hive_server2_lib import HiveServerClientCompatible

      if query_server['server_name'] == 'impala':
        from impala.dbms import ImpalaDbms
        from impala.server import ImpalaServerClient
        DBMS_CACHE[user.username][query_server['server_name']] = ImpalaDbms(HiveServerClientCompatible(ImpalaServerClient(query_server, user)), QueryHistory.SERVER_TYPE[1][0])
      else:
        from beeswax.server.hive_server2_lib import HiveServerClient
        DBMS_CACHE[user.username][query_server['server_name']] = HiveServer2Dbms(HiveServerClientCompatible(HiveServerClient(query_server, user)), QueryHistory.SERVER_TYPE[1][0])

    return DBMS_CACHE[user.username][query_server['server_name']]
  finally:
    DBMS_CACHE_LOCK.release()
예제 #3
0
  def get_sample(self, database, table, column=None, nested=None, limit=100, generate_sql_only=False):
    result = None
    hql = None

    # Filter on max # of partitions for partitioned tables
    column = '`%s`' % column if column else '*'
    if table.partition_keys:
      hql = self._get_sample_partition_query(database, table, column, limit)
    elif self.server_name == 'impala':
      if column or nested:
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit)
    else:
      hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      # TODO: Add nested select support for HS2

    if hql:
      if generate_sql_only:
        return hql
      else:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #4
0
파일: dbms.py 프로젝트: gorden2/hue
  def get_sample(self, database, table, column=None, nested=None, limit=100):
    result = None
    hql = None

    # Filter on max # of partitions for partitioned tables
    column = '`%s`' % column if column else '*'
    if table.partition_keys:
      hql = self._get_sample_partition_query(database, table, column, limit)
    elif self.server_name == 'impala':
      if column or nested:
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit)
    else:
      hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      # TODO: Add nested select support for HS2

    if hql:
      query = hql_query(hql)
      handle = self.execute_and_wait(query, timeout_sec=5.0)

      if handle:
        result = self.fetch(handle, rows=100)
        self.close(handle)

    return result
예제 #5
0
    def get_sample(self, database, table, column=None, nested=None):
        result = None
        hql = None

        if not table.is_view:
            limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

            if column or nested:  # Could do column for any type, then nested with partitions
                if self.server_name == 'impala':
                    select_clause, from_clause = ImpalaDbms.get_nested_select(
                        database, table.name, column, nested)
                    hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause,
                                                          from_clause, limit)
            else:
                if table.partition_keys:  # Filter on max # of partitions for partitioned tables
                    hql = self._get_sample_partition_query(
                        database, table, limit)
                else:
                    hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (
                        database, table.name, limit)

            if hql:
                query = hql_query(hql)
                handle = self.execute_and_wait(query, timeout_sec=5.0)

                if handle:
                    result = self.fetch(handle, rows=100)
                    self.close(handle)

        return result
예제 #6
0
    def get_sample(self, database, table, column=None, nested=None):
        result = None
        hql = None

        limit = 100

        if column or nested:  # Could do column for any type, then nested with partitions
            if self.server_name == 'impala':
                from impala.dbms import ImpalaDbms
                select_clause, from_clause = ImpalaDbms.get_nested_select(
                    database, table.name, column, nested)
                hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause,
                                                      from_clause, limit)
        else:
            # Filter on max # of partitions for partitioned tables
            # Impala's SHOW PARTITIONS is different from Hive, so we only support Hive for now
            if self.server_name != 'impala' and table.partition_keys:
                hql = self._get_sample_partition_query(database, table, limit)
            else:
                hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database,
                                                            table.name, limit)

        if hql:
            query = hql_query(hql)
            handle = self.execute_and_wait(query, timeout_sec=5.0)

            if handle:
                result = self.fetch(handle, rows=100)
                self.close(handle)

        return result
예제 #7
0
파일: dbms.py 프로젝트: rhmiller47/hue
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    if not table.is_view:
      limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

      if column or nested: # Could do column for any type, then nested with partitions 
        if self.server_name == 'impala':
          select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
          hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
      else:
        if table.partition_keys:  # Filter on max # of partitions for partitioned tables
          hql = self._get_sample_partition_query(database, table, limit)
        else:
          hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit)

      if hql:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #8
0
파일: dbms.py 프로젝트: zhangyan612/hue
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    limit = 100

    if column or nested: # Could do column for any type, then nested with partitions
      if self.server_name == 'impala':
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
    else:
      # Filter on max # of partitions for partitioned tables
      # Impala's SHOW PARTITIONS is different from Hive, so we only support Hive for now
      if self.server_name != 'impala' and table.partition_keys:
        hql = self._get_sample_partition_query(database, table, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit)

    if hql:
      query = hql_query(hql)
      handle = self.execute_and_wait(query, timeout_sec=5.0)

      if handle:
        result = self.fetch(handle, rows=100)
        self.close(handle)

    return result
예제 #9
0
파일: dbms.py 프로젝트: hdinsight/hue
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    if not table.is_view:

      limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

      if column or nested: # Could do column for any type, then nested with partitions 
        if self.server_name == 'impala':
          select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
          hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
      else:
        partition_query = ""
        if table.partition_keys:
          partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1)
          partition_query = 'WHERE ' + ' AND '.join(["%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values)])
        hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (database, table.name, partition_query, limit)

      if hql:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #10
0
 def test_get_impala_nested_select(self):
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'id', None),
         ('id', '`default`.`customers`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers',
                                      'email_preferences',
                                      'categories/promos/'),
         ('email_preferences.categories.promos', '`default`.`customers`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'addresses',
                                      'key'),
         ('key', '`default`.`customers`.`addresses`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'addresses',
                                      'value/street_1/'),
         ('street_1', '`default`.`customers`.`addresses`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'orders',
                                      'item/order_date'),
         ('order_date', '`default`.`customers`.`orders`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'orders',
                                      'item/items/item/product_id'),
         ('product_id', '`default`.`customers`.`orders`.`items`'))
예제 #11
0
def get(user, query_server=None, cluster=None):
  global DBMS_CACHE
  global DBMS_CACHE_LOCK
  global RESET_HS2_QUERY_SERVER

  if query_server is None:
    query_server = get_query_server_config(connector=cluster)

  DBMS_CACHE_LOCK.acquire()
  try:
    DBMS_CACHE.setdefault(user.id, {})

    if query_server['server_name'] not in DBMS_CACHE[user.id]:
      # Avoid circular dependency
      from beeswax.server.hive_server2_lib import HiveServerClientCompatible

      if query_server.get('dialect') == 'impala':
        from impala.dbms import ImpalaDbms
        from impala.server import ImpalaServerClient
        DBMS_CACHE[user.id][query_server['server_name']] = ImpalaDbms(
            HiveServerClientCompatible(ImpalaServerClient(query_server, user)),
            QueryHistory.SERVER_TYPE[1][0]
        )
      elif query_server['server_name'] == 'hms':
        from beeswax.server.hive_metastore_server import HiveMetastoreClient
        DBMS_CACHE[user.id][query_server['server_name']] = HiveServer2Dbms(
            HiveMetastoreClient(query_server, user),
            QueryHistory.SERVER_TYPE[1][0]
        )
      else:
        from beeswax.server.hive_server2_lib import HiveServerClient
        DBMS_CACHE[user.id][query_server['server_name']] = HiveServer2Dbms(
            HiveServerClientCompatible(HiveServerClient(query_server, user)),
            QueryHistory.SERVER_TYPE[1][0]
        )
    elif RESET_HS2_QUERY_SERVER:
      from beeswax.server.hive_server2_lib import HiveServerClient, HiveServerClientCompatible
      RESET_HS2_QUERY_SERVER = False
      LOG.debug('Setting DBMS cache for the new hs2')
      DBMS_CACHE[user.id].clear()
      DBMS_CACHE[user.id][query_server['server_name']] = HiveServer2Dbms(
        HiveServerClientCompatible(HiveServerClient(query_server, user)),
        QueryHistory.SERVER_TYPE[1][0]
      )
    return DBMS_CACHE[user.id][query_server['server_name']]
  finally:
    DBMS_CACHE_LOCK.release()
예제 #12
0
파일: tests.py 프로젝트: 18600597055/hue
 def test_get_impala_nested_select(self):
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'id', None), ('id', '`default`.`customers`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'email_preferences', 'categories/promos/'),
                ('email_preferences.categories.promos', '`default`.`customers`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'key'),
                ('key', '`default`.`customers`.`addresses`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'value/street_1/'),
                ('street_1', '`default`.`customers`.`addresses`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/order_date'),
                ('order_date', '`default`.`customers`.`orders`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/items/item/product_id'),
                ('product_id', '`default`.`customers`.`orders`.`items`'))
예제 #13
0
파일: dbms.py 프로젝트: cloudera/hue
  def get_sample(self, database, table, column=None, nested=None, limit=100, generate_sql_only=False, operation=None):
    result = None
    hql = None

    # Filter on max # of partitions for partitioned tables
    column = '`%s`' % column if column else '*'
    if table.partition_keys:
      hql = self._get_sample_partition_query(database, table, column, limit, operation)
    elif self.server_name.startswith('impala'):
      if column or nested:
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        if operation == 'distinct':
          hql = 'SELECT DISTINCT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
        elif operation == 'max':
          hql = 'SELECT max(%s) FROM %s;' % (select_clause, from_clause)
        elif operation == 'min':
          hql = 'SELECT min(%s) FROM %s;' % (select_clause, from_clause)
        else:
          hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit)
    else:
      if operation == 'distinct':
        hql = "SELECT DISTINCT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      if operation == 'max':
        hql = "SELECT max(%s) FROM `%s`.`%s`;" % (column, database, table.name)
      if operation == 'min':
        hql = "SELECT min(%s) FROM `%s`.`%s`;" % (column, database, table.name)
      else:
        hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      # TODO: Add nested select support for HS2

    if hql:
      if generate_sql_only:
        return hql
      else:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #14
0
파일: dbms.py 프로젝트: hdinsight/hue
    def get_sample(self, database, table, column=None, nested=None):
        result = None
        hql = None

        if not table.is_view:

            limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

            if column or nested:  # Could do column for any type, then nested with partitions
                if self.server_name == 'impala':
                    select_clause, from_clause = ImpalaDbms.get_nested_select(
                        database, table.name, column, nested)
                    hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause,
                                                          from_clause, limit)
            else:
                partition_query = ""
                if table.partition_keys:
                    partitions = self.get_partitions(database,
                                                     table,
                                                     partition_spec=None,
                                                     max_parts=1)
                    partition_query = 'WHERE ' + ' AND '.join([
                        "%s='%s'" % (table.partition_keys[idx].name, key)
                        for idx, key in enumerate(partitions[0].values)
                    ])
                hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (
                    database, table.name, partition_query, limit)

            if hql:
                query = hql_query(hql)
                handle = self.execute_and_wait(query, timeout_sec=5.0)

                if handle:
                    result = self.fetch(handle, rows=100)
                    self.close(handle)

        return result