def get_partitions(self, db_name, table, max_parts=None): if max_parts is None or max_parts > BROWSE_PARTITIONED_TABLE_LIMIT.get(): max_parts = BROWSE_PARTITIONED_TABLE_LIMIT.get() # DB name not supported in SHOW PARTITIONS self.use(db_name) return self.client.get_partitions(db_name, table.name, max_parts)
def get_partitions(self, db_name, table, partition_spec=None, max_parts=None, reverse_sort=True): if max_parts is None or max_parts > BROWSE_PARTITIONED_TABLE_LIMIT.get( ): max_parts = BROWSE_PARTITIONED_TABLE_LIMIT.get() return self.client.get_partitions(db_name, table.name, partition_spec, max_parts, reverse_sort)
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: if table.partition_keys: # Filter on max # of partitions for partitioned tables hql = self._get_sample_partition_query(database, table, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def select_star_from(self, database, table): if table.partition_keys: # Filter on max # of partitions for partitioned tables limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) hql = self._get_sample_partition_query(database, table, limit) else: hql = "SELECT * FROM `%s`.`%s`" % (database, table.name) return self.execute_statement(hql)
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: partition_query = "" if table.partition_keys: partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1) partition_query = 'WHERE ' + ' AND '.join(["%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values)]) hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (database, table.name, partition_query, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def _get_browse_limit_clause(self, table): """Get the limit clause when browsing a partitioned table""" if table.partition_keys: limit = BROWSE_PARTITIONED_TABLE_LIMIT.get() if limit > 0: return "LIMIT %d" % (limit,) return ""
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select( database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: if table.partition_keys: # Filter on max # of partitions for partitioned tables hql = self._get_sample_partition_query( database, table, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % ( database, table.name, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def _get_browse_limit_clause(self, table): """Get the limit clause when browsing a partitioned table""" if table.partition_keys: limit = BROWSE_PARTITIONED_TABLE_LIMIT.get() if limit > 0: return "LIMIT %d" % (limit, ) return ""
def get_sample(self, database, table): """No samples if it's a view (HUE-526)""" if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) hql = "SELECT * FROM `%s.%s` LIMIT %s" % (database, table.name, limit) query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: return self.fetch(handle)
def get_browse_partition_clause(self, table, partitions): """Get the where clause to limit reading data in the first available partitions""" if partitions and BROWSE_PARTITIONED_TABLE_LIMIT.get() > 0: partition_values = partitions[0].values partition_keys = table.partition_keys partition_dict = zip(partition_keys, partition_values) fields = [] for key, value in partition_dict: if key.type == "string": fields.append("`%s` = '%s'" % (key.name, value)) else: fields.append("`%s` = %s" % (key.name, value)) return "WHERE " + " AND ".join(fields)
def get_sample(self, database, table): """No samples if it's a view (HUE-526)""" if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) partition_query = "" if table.partition_keys: partitions = self.get_partitions(database, table, 1) partition_query = 'WHERE ' + ' AND '.join(["%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values)]) hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (database, table.name, partition_query, limit) query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table): """No samples if it's a view (HUE-526)""" if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) partition_query = "" if table.partition_keys: partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1) partition_query = 'WHERE ' + ' AND '.join(["%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values)]) hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (database, table.name, partition_query, limit) query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table): """No samples if it's a view (HUE-526)""" if not table.is_view: limit = BROWSE_TABLE_LIMIT.get() if not limit: limit = 100 limit = min(limit, BROWSE_PARTITIONED_TABLE_LIMIT.get()) hql = "SELECT * FROM %s.%s LIMIT %s" % (database, table.name, limit) query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=limit) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if table.partition_keys: # Filter on max # of partitions for partitioned tables hql = self._get_sample_partition_query(database, table, limit) else: hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_sample(self, database, table, column=None, nested=None): result = None hql = None if not table.is_view: limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get()) if column or nested: # Could do column for any type, then nested with partitions if self.server_name == 'impala': select_clause, from_clause = ImpalaDbms.get_nested_select( database, table.name, column, nested) hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit) else: partition_query = "" if table.partition_keys: partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1) partition_query = 'WHERE ' + ' AND '.join([ "%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values) ]) hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % ( database, table.name, partition_query, limit) if hql: query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=5.0) if handle: result = self.fetch(handle, rows=100) self.close(handle) return result
def get_partitions(self, db_name, table, max_parts=None): if max_parts is None or max_parts > BROWSE_PARTITIONED_TABLE_LIMIT.get(): max_parts = BROWSE_PARTITIONED_TABLE_LIMIT.get() return self.client.get_partitions(db_name, table.name, max_parts)
def get_partitions(self, db_name, table, partition_spec=None, max_parts=None, reverse_sort=True): if max_parts is None or max_parts > BROWSE_PARTITIONED_TABLE_LIMIT.get(): max_parts = BROWSE_PARTITIONED_TABLE_LIMIT.get() return self.client.get_partitions(db_name, table.name, partition_spec, max_parts, reverse_sort)