def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select( database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: if table.partition_keys: # Filter on max # of partitions for partitioned tables hql = self._get_sample_partition_query( database, table, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % ( database, table.name, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None, limit=100, generate_sql_only=False): result = None hql = None # Filter on max # of partitions for partitioned tables column = '`%s`' % column if column else '*' if table.partition_keys: hql = self._get_sample_partition_query(database, table, column, limit) elif self.server_name == 'impala': if column or nested: from impala.dbms import ImpalaDbms select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit) else: hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit) # TODO: Add nested select support for HS2 if hql: if generate_sql_only: return hql else: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None, limit=100): result = None hql = None # Filter on max # of partitions for partitioned tables column = '`%s`' % column if column else '*' if table.partition_keys: hql = self._get_sample_partition_query(database, table, column, limit) elif self.server_name == 'impala': if column or nested: from impala.dbms import ImpalaDbms select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit) else: hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit) # TODO: Add nested select support for HS2 if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None): result = None hql = None limit = 100 if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': from impala.dbms import ImpalaDbms select_clause, from_clause = ImpalaDbms.get_nested_select( database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: # Filter on max # of partitions for partitioned tables # Impala's SHOW PARTITIONS is different from Hive, so we only support Hive for now if self.server_name != 'impala' and table.partition_keys: hql = self._get_sample_partition_query(database, table, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: if table.partition_keys: # Filter on max # of partitions for partitioned tables hql = self._get_sample_partition_query(database, table, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None): result = None hql = None limit = 100 if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': from impala.dbms import ImpalaDbms select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: # Filter on max # of partitions for partitioned tables # Impala's SHOW PARTITIONS is different from Hive, so we only support Hive for now if self.server_name != 'impala' and table.partition_keys: hql = self._get_sample_partition_query(database, table, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: partition_query = "" if table.partition_keys: partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1) partition_query = 'WHERE ' + ' AND '.join(["%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values)]) hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (database, table.name, partition_query, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def test_get_impala_nested_select(self): assert_equal( ImpalaDbms.get_nested_select('default', 'customers', 'id', None), ('id', '`default`.`customers`')) assert_equal( ImpalaDbms.get_nested_select('default', 'customers', 'email_preferences', 'categories/promos/'), ('email_preferences.categories.promos', '`default`.`customers`')) assert_equal( ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'key'), ('key', '`default`.`customers`.`addresses`')) assert_equal( ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'value/street_1/'), ('street_1', '`default`.`customers`.`addresses`')) assert_equal( ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/order_date'), ('order_date', '`default`.`customers`.`orders`')) assert_equal( ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/items/item/product_id'), ('product_id', '`default`.`customers`.`orders`.`items`'))
def test_get_impala_nested_select(self): assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'id', None), ('id', '`default`.`customers`')) assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'email_preferences', 'categories/promos/'), ('email_preferences.categories.promos', '`default`.`customers`')) assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'key'), ('key', '`default`.`customers`.`addresses`')) assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'value/street_1/'), ('street_1', '`default`.`customers`.`addresses`')) assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/order_date'), ('order_date', '`default`.`customers`.`orders`')) assert_equal(ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/items/item/product_id'), ('product_id', '`default`.`customers`.`orders`.`items`'))
def get_sample(self, database, table, column=None, nested=None, limit=100, generate_sql_only=False, operation=None): result = None hql = None # Filter on max # of partitions for partitioned tables column = '`%s`' % column if column else '*' if table.partition_keys: hql = self._get_sample_partition_query(database, table, column, limit, operation) elif self.server_name.startswith('impala'): if column or nested: from impala.dbms import ImpalaDbms select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) if operation == 'distinct': hql = 'SELECT DISTINCT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit) elif operation == 'max': hql = 'SELECT max(%s) FROM %s;' % (select_clause, from_clause) elif operation == 'min': hql = 'SELECT min(%s) FROM %s;' % (select_clause, from_clause) else: hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit) else: if operation == 'distinct': hql = "SELECT DISTINCT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit) if operation == 'max': hql = "SELECT max(%s) FROM `%s`.`%s`;" % (column, database, table.name) if operation == 'min': hql = "SELECT min(%s) FROM `%s`.`%s`;" % (column, database, table.name) else: hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit) # TODO: Add nested select support for HS2 if hql: if generate_sql_only: return hql else: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select( database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: partition_query = "" if table.partition_keys: partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1) partition_query = 'WHERE ' + ' AND '.join([ "%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values) ]) hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % ( database, table.name, partition_query, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result