예제 #1
0
    def get_sample(self, database, table, column=None, nested=None):
        result = None
        hql = None

        if not table.is_view:
            limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

            if column or nested:  # Could do column for any type, then nested with partitions
                if self.server_name == 'impala':
                    select_clause, from_clause = ImpalaDbms.get_nested_select(
                        database, table.name, column, nested)
                    hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause,
                                                          from_clause, limit)
            else:
                if table.partition_keys:  # Filter on max # of partitions for partitioned tables
                    hql = self._get_sample_partition_query(
                        database, table, limit)
                else:
                    hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (
                        database, table.name, limit)

            if hql:
                query = hql_query(hql)
                handle = self.execute_and_wait(query, timeout_sec=5.0)

                if handle:
                    result = self.fetch(handle, rows=100)
                    self.close(handle)

        return result
예제 #2
0
  def get_sample(self, database, table, column=None, nested=None, limit=100, generate_sql_only=False):
    result = None
    hql = None

    # Filter on max # of partitions for partitioned tables
    column = '`%s`' % column if column else '*'
    if table.partition_keys:
      hql = self._get_sample_partition_query(database, table, column, limit)
    elif self.server_name == 'impala':
      if column or nested:
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit)
    else:
      hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      # TODO: Add nested select support for HS2

    if hql:
      if generate_sql_only:
        return hql
      else:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #3
0
파일: dbms.py 프로젝트: gorden2/hue
  def get_sample(self, database, table, column=None, nested=None, limit=100):
    result = None
    hql = None

    # Filter on max # of partitions for partitioned tables
    column = '`%s`' % column if column else '*'
    if table.partition_keys:
      hql = self._get_sample_partition_query(database, table, column, limit)
    elif self.server_name == 'impala':
      if column or nested:
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit)
    else:
      hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      # TODO: Add nested select support for HS2

    if hql:
      query = hql_query(hql)
      handle = self.execute_and_wait(query, timeout_sec=5.0)

      if handle:
        result = self.fetch(handle, rows=100)
        self.close(handle)

    return result
예제 #4
0
    def get_sample(self, database, table, column=None, nested=None):
        result = None
        hql = None

        limit = 100

        if column or nested:  # Could do column for any type, then nested with partitions
            if self.server_name == 'impala':
                from impala.dbms import ImpalaDbms
                select_clause, from_clause = ImpalaDbms.get_nested_select(
                    database, table.name, column, nested)
                hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause,
                                                      from_clause, limit)
        else:
            # Filter on max # of partitions for partitioned tables
            # Impala's SHOW PARTITIONS is different from Hive, so we only support Hive for now
            if self.server_name != 'impala' and table.partition_keys:
                hql = self._get_sample_partition_query(database, table, limit)
            else:
                hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database,
                                                            table.name, limit)

        if hql:
            query = hql_query(hql)
            handle = self.execute_and_wait(query, timeout_sec=5.0)

            if handle:
                result = self.fetch(handle, rows=100)
                self.close(handle)

        return result
예제 #5
0
파일: dbms.py 프로젝트: rhmiller47/hue
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    if not table.is_view:
      limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

      if column or nested: # Could do column for any type, then nested with partitions 
        if self.server_name == 'impala':
          select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
          hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
      else:
        if table.partition_keys:  # Filter on max # of partitions for partitioned tables
          hql = self._get_sample_partition_query(database, table, limit)
        else:
          hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit)

      if hql:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #6
0
파일: dbms.py 프로젝트: zhangyan612/hue
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    limit = 100

    if column or nested: # Could do column for any type, then nested with partitions
      if self.server_name == 'impala':
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
    else:
      # Filter on max # of partitions for partitioned tables
      # Impala's SHOW PARTITIONS is different from Hive, so we only support Hive for now
      if self.server_name != 'impala' and table.partition_keys:
        hql = self._get_sample_partition_query(database, table, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit)

    if hql:
      query = hql_query(hql)
      handle = self.execute_and_wait(query, timeout_sec=5.0)

      if handle:
        result = self.fetch(handle, rows=100)
        self.close(handle)

    return result
예제 #7
0
파일: dbms.py 프로젝트: hdinsight/hue
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    if not table.is_view:

      limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

      if column or nested: # Could do column for any type, then nested with partitions 
        if self.server_name == 'impala':
          select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
          hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
      else:
        partition_query = ""
        if table.partition_keys:
          partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1)
          partition_query = 'WHERE ' + ' AND '.join(["%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values)])
        hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (database, table.name, partition_query, limit)

      if hql:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #8
0
 def test_get_impala_nested_select(self):
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'id', None),
         ('id', '`default`.`customers`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers',
                                      'email_preferences',
                                      'categories/promos/'),
         ('email_preferences.categories.promos', '`default`.`customers`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'addresses',
                                      'key'),
         ('key', '`default`.`customers`.`addresses`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'addresses',
                                      'value/street_1/'),
         ('street_1', '`default`.`customers`.`addresses`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'orders',
                                      'item/order_date'),
         ('order_date', '`default`.`customers`.`orders`'))
     assert_equal(
         ImpalaDbms.get_nested_select('default', 'customers', 'orders',
                                      'item/items/item/product_id'),
         ('product_id', '`default`.`customers`.`orders`.`items`'))
예제 #9
0
파일: tests.py 프로젝트: 18600597055/hue
 def test_get_impala_nested_select(self):
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'id', None), ('id', '`default`.`customers`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'email_preferences', 'categories/promos/'),
                ('email_preferences.categories.promos', '`default`.`customers`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'key'),
                ('key', '`default`.`customers`.`addresses`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'value/street_1/'),
                ('street_1', '`default`.`customers`.`addresses`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/order_date'),
                ('order_date', '`default`.`customers`.`orders`'))
   assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/items/item/product_id'),
                ('product_id', '`default`.`customers`.`orders`.`items`'))
예제 #10
0
파일: dbms.py 프로젝트: cloudera/hue
  def get_sample(self, database, table, column=None, nested=None, limit=100, generate_sql_only=False, operation=None):
    result = None
    hql = None

    # Filter on max # of partitions for partitioned tables
    column = '`%s`' % column if column else '*'
    if table.partition_keys:
      hql = self._get_sample_partition_query(database, table, column, limit, operation)
    elif self.server_name.startswith('impala'):
      if column or nested:
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        if operation == 'distinct':
          hql = 'SELECT DISTINCT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
        elif operation == 'max':
          hql = 'SELECT max(%s) FROM %s;' % (select_clause, from_clause)
        elif operation == 'min':
          hql = 'SELECT min(%s) FROM %s;' % (select_clause, from_clause)
        else:
          hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit)
    else:
      if operation == 'distinct':
        hql = "SELECT DISTINCT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      if operation == 'max':
        hql = "SELECT max(%s) FROM `%s`.`%s`;" % (column, database, table.name)
      if operation == 'min':
        hql = "SELECT min(%s) FROM `%s`.`%s`;" % (column, database, table.name)
      else:
        hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      # TODO: Add nested select support for HS2

    if hql:
      if generate_sql_only:
        return hql
      else:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
예제 #11
0
파일: dbms.py 프로젝트: hdinsight/hue
    def get_sample(self, database, table, column=None, nested=None):
        result = None
        hql = None

        if not table.is_view:

            limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

            if column or nested:  # Could do column for any type, then nested with partitions
                if self.server_name == 'impala':
                    select_clause, from_clause = ImpalaDbms.get_nested_select(
                        database, table.name, column, nested)
                    hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause,
                                                          from_clause, limit)
            else:
                partition_query = ""
                if table.partition_keys:
                    partitions = self.get_partitions(database,
                                                     table,
                                                     partition_spec=None,
                                                     max_parts=1)
                    partition_query = 'WHERE ' + ' AND '.join([
                        "%s='%s'" % (table.partition_keys[idx].name, key)
                        for idx, key in enumerate(partitions[0].values)
                    ])
                hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (
                    database, table.name, partition_query, limit)

            if hql:
                query = hql_query(hql)
                handle = self.execute_and_wait(query, timeout_sec=5.0)

                if handle:
                    result = self.fetch(handle, rows=100)
                    self.close(handle)

        return result