def get_partitions(self, db_name, table, partition_spec=None, max_parts=None, reverse_sort=True): if max_parts is None or max_parts > LIST_PARTITIONS_LIMIT.get(): max_parts = LIST_PARTITIONS_LIMIT.get() return self.client.get_partitions( db_name, table.name, partition_spec, max_parts=max_parts, reverse_sort=reverse_sort )
def get_partitions(self, db_name, table, partition_spec=None, max_parts=None, reverse_sort=True): if max_parts is None or max_parts > LIST_PARTITIONS_LIMIT.get(): max_parts = LIST_PARTITIONS_LIMIT.get() return self.client.get_partitions(db_name, table.name, partition_spec, max_parts=max_parts, reverse_sort=reverse_sort)
def get_partitions(self, database, table_name, partition_spec=None, max_parts=None, reverse_sort=True): table = self.get_table(database, table_name) query = 'SHOW PARTITIONS `%s`.`%s`' % (database, table_name) if partition_spec: query += ' PARTITION(%s)' % partition_spec # We fetch N partitions then reverse the order later and get the max_parts. Use partition_spec to refine more the initial list. # Need to fetch more like this until SHOW PARTITIONS offers a LIMIT and ORDER BY partition_table = self.execute_query_statement(query, max_rows=10000) partitions = [ PartitionValueCompatible(partition, table) for partition in partition_table.rows() ] if reverse_sort: partitions.reverse() if max_parts is None or max_parts <= 0: max_parts = LIST_PARTITIONS_LIMIT.get() return partitions[:max_parts]
def get_partitions(self, database, table_name, partition_spec=None, max_parts=None, reverse_sort=True): table = self.get_table(database, table_name) query = 'SHOW PARTITIONS `%s`.`%s`' % (database, table_name) if self.query_server['server_name'] == 'beeswax' and partition_spec: query += ' PARTITION(%s)' % partition_spec # We fetch N partitions then reverse the order later and get the max_parts. Use partition_spec to refine more the initial list. # Need to fetch more like this until SHOW PARTITIONS offers a LIMIT and ORDER BY partition_table = self.execute_query_statement(query, max_rows=10000, orientation=TFetchOrientation.FETCH_NEXT, close_operation=True) if self.query_server['server_name'].startswith('impala'): try: # Fetch all partition key names, which are listed before the #Rows column cols = [col.name for col in partition_table.cols()] stop = cols.index('#Rows') partition_keys = cols[:stop] num_parts = len(partition_keys) # Get all partition values rows = partition_table.rows() partition_values = [partition[:num_parts] for partition in rows] # Truncate last row which is the Total partition_values = partition_values[:-1] partitions_formatted = [] # Format partition key and values into Hive format: [key1=val1/key2=value2] for values in partition_values: zipped_parts = izip(partition_keys, values) partitions_formatted.append(['/'.join(['%s=%s' % (str(part[0]), str(part[1])) for part in zipped_parts if all(part)])]) partitions = [PartitionValueCompatible(partition, table) for partition in partitions_formatted] except Exception: raise ValueError(_('Failed to determine partition keys for Impala table: `%s`.`%s`') % (database, table_name)) else: partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()] if reverse_sort: partitions.reverse() if max_parts is None or max_parts <= 0: max_parts = LIST_PARTITIONS_LIMIT.get() return partitions[:max_parts]
def get_partitions(self, database, table_name, partition_spec=None, max_parts=None, reverse_sort=True): table = self.get_table(database, table_name) query = 'SHOW PARTITIONS `%s`.`%s`' % (database, table_name) if partition_spec: query += ' PARTITION(%s)' % partition_spec # We fetch N partitions then reverse the order later and get the max_parts. Use partition_spec to refine more the initial list. # Need to fetch more like this until SHOW PARTITIONS offers a LIMIT and ORDER BY partition_table = self.execute_query_statement(query, max_rows=10000) partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()] if reverse_sort: partitions.reverse() if max_parts is None or max_parts <= 0: max_parts = LIST_PARTITIONS_LIMIT.get() return partitions[:max_parts]
# Format partition key and values into Hive format: [key1=val1/key2=value2] for values in partition_values: zipped_parts = izip(partition_keys, values) partitions_formatted.append(['/'.join(['%s=%s' % (str(part[0]), str(part[1])) for part in zipped_parts if all(part)])]) partitions = [PartitionValueCompatible(partition, table) for partition in partitions_formatted] except Exception, e: raise ValueError(_('Failed to determine partition keys for Impala table: `%s`.`%s`') % (database, table_name)) else: partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()] if reverse_sort: partitions.reverse() if max_parts is None or max_parts <= 0: max_parts = LIST_PARTITIONS_LIMIT.get() return partitions[:max_parts] def get_configuration(self): configuration = {} if self.query_server['server_name'] == 'impala': # Return all configuration settings query = 'SET' results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_NEXT, close_operation=True) configuration = dict((row[0], row[1]) for row in results.rows()) else: # For Hive, only return white-listed configurations query = 'SET -v' results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_FIRST, max_rows=-1, close_operation=True) config_whitelist = [config.lower() for config in CONFIG_WHITELIST.get()]
# Format partition key and values into Hive format: [key1=val1/key2=value2] for values in partition_values: zipped_parts = izip(partition_keys, values) partitions_formatted.append(['/'.join(['%s=%s' % (part[0], part[1]) for part in zipped_parts])]) partitions = [PartitionValueCompatible(partition, table) for partition in partitions_formatted] except Exception, e: raise ValueError(_('Failed to determine partition keys for Impala table: `%s`.`%s`') % (database, table_name)) else: partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()] if reverse_sort: partitions.reverse() if max_parts is None or max_parts <= 0: max_parts = LIST_PARTITIONS_LIMIT.get() return partitions[:max_parts] def get_configuration(self): configuration = {} if self.query_server['server_name'] == 'impala': # Return all configuration settings query = 'SET' results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_NEXT) configuration = dict((row[0], row[1]) for row in results.rows()) else: # For Hive, only return white-listed configurations query = 'SET -v' results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_FIRST) config_whitelist = [config.lower() for config in CONFIG_WHITELIST.get()]