Exemple #1
0
    def get_partitions(self, db_name, table, partition_spec=None, max_parts=None, reverse_sort=True):
        if max_parts is None or max_parts > LIST_PARTITIONS_LIMIT.get():
            max_parts = LIST_PARTITIONS_LIMIT.get()

        return self.client.get_partitions(
            db_name, table.name, partition_spec, max_parts=max_parts, reverse_sort=reverse_sort
        )
Exemple #2
0
    def get_partitions(self,
                       db_name,
                       table,
                       partition_spec=None,
                       max_parts=None,
                       reverse_sort=True):
        if max_parts is None or max_parts > LIST_PARTITIONS_LIMIT.get():
            max_parts = LIST_PARTITIONS_LIMIT.get()

        return self.client.get_partitions(db_name,
                                          table.name,
                                          partition_spec,
                                          max_parts=max_parts,
                                          reverse_sort=reverse_sort)
Exemple #3
0
    def get_partitions(self,
                       database,
                       table_name,
                       partition_spec=None,
                       max_parts=None,
                       reverse_sort=True):
        table = self.get_table(database, table_name)

        query = 'SHOW PARTITIONS `%s`.`%s`' % (database, table_name)
        if partition_spec:
            query += ' PARTITION(%s)' % partition_spec

        # We fetch N partitions then reverse the order later and get the max_parts. Use partition_spec to refine more the initial list.
        # Need to fetch more like this until SHOW PARTITIONS offers a LIMIT and ORDER BY
        partition_table = self.execute_query_statement(query, max_rows=10000)

        partitions = [
            PartitionValueCompatible(partition, table)
            for partition in partition_table.rows()
        ]

        if reverse_sort:
            partitions.reverse()

        if max_parts is None or max_parts <= 0:
            max_parts = LIST_PARTITIONS_LIMIT.get()

        return partitions[:max_parts]
Exemple #4
0
  def get_partitions(self, database, table_name, partition_spec=None, max_parts=None, reverse_sort=True):
    table = self.get_table(database, table_name)

    query = 'SHOW PARTITIONS `%s`.`%s`' % (database, table_name)
    if self.query_server['server_name'] == 'beeswax' and partition_spec:
      query += ' PARTITION(%s)' % partition_spec

    # We fetch N partitions then reverse the order later and get the max_parts. Use partition_spec to refine more the initial list.
    # Need to fetch more like this until SHOW PARTITIONS offers a LIMIT and ORDER BY
    partition_table = self.execute_query_statement(query, max_rows=10000, orientation=TFetchOrientation.FETCH_NEXT, close_operation=True)

    if self.query_server['server_name'].startswith('impala'):
      try:
        # Fetch all partition key names, which are listed before the #Rows column
        cols = [col.name for col in partition_table.cols()]
        stop = cols.index('#Rows')
        partition_keys = cols[:stop]
        num_parts = len(partition_keys)

        # Get all partition values
        rows = partition_table.rows()
        partition_values = [partition[:num_parts] for partition in rows]

        # Truncate last row which is the Total
        partition_values = partition_values[:-1]
        partitions_formatted = []

        # Format partition key and values into Hive format: [key1=val1/key2=value2]
        for values in partition_values:
          zipped_parts = izip(partition_keys, values)
          partitions_formatted.append(['/'.join(['%s=%s' % (str(part[0]), str(part[1])) for part in zipped_parts if all(part)])])

        partitions = [PartitionValueCompatible(partition, table) for partition in partitions_formatted]
      except Exception:
        raise ValueError(_('Failed to determine partition keys for Impala table: `%s`.`%s`') % (database, table_name))
    else:
      partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()]

    if reverse_sort:
      partitions.reverse()

    if max_parts is None or max_parts <= 0:
      max_parts = LIST_PARTITIONS_LIMIT.get()

    return partitions[:max_parts]
Exemple #5
0
  def get_partitions(self, database, table_name, partition_spec=None, max_parts=None, reverse_sort=True):
    table = self.get_table(database, table_name)

    query = 'SHOW PARTITIONS `%s`.`%s`' % (database, table_name)
    if partition_spec:
      query += ' PARTITION(%s)' % partition_spec

    # We fetch N partitions then reverse the order later and get the max_parts. Use partition_spec to refine more the initial list.
    # Need to fetch more like this until SHOW PARTITIONS offers a LIMIT and ORDER BY
    partition_table = self.execute_query_statement(query, max_rows=10000)

    partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()]

    if reverse_sort:
      partitions.reverse()

    if max_parts is None or max_parts <= 0:
      max_parts = LIST_PARTITIONS_LIMIT.get()

    return partitions[:max_parts]
Exemple #6
0
        # Format partition key and values into Hive format: [key1=val1/key2=value2]
        for values in partition_values:
          zipped_parts = izip(partition_keys, values)
          partitions_formatted.append(['/'.join(['%s=%s' % (str(part[0]), str(part[1])) for part in zipped_parts if all(part)])])

        partitions = [PartitionValueCompatible(partition, table) for partition in partitions_formatted]
      except Exception, e:
        raise ValueError(_('Failed to determine partition keys for Impala table: `%s`.`%s`') % (database, table_name))
    else:
      partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()]

    if reverse_sort:
      partitions.reverse()

    if max_parts is None or max_parts <= 0:
      max_parts = LIST_PARTITIONS_LIMIT.get()

    return partitions[:max_parts]


  def get_configuration(self):
    configuration = {}

    if self.query_server['server_name'] == 'impala':  # Return all configuration settings
      query = 'SET'
      results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_NEXT, close_operation=True)
      configuration = dict((row[0], row[1]) for row in results.rows())
    else:  # For Hive, only return white-listed configurations
      query = 'SET -v'
      results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_FIRST, max_rows=-1, close_operation=True)
      config_whitelist = [config.lower() for config in CONFIG_WHITELIST.get()]
Exemple #7
0
        # Format partition key and values into Hive format: [key1=val1/key2=value2]
        for values in partition_values:
          zipped_parts = izip(partition_keys, values)
          partitions_formatted.append(['/'.join(['%s=%s' % (part[0], part[1]) for part in zipped_parts])])

        partitions = [PartitionValueCompatible(partition, table) for partition in partitions_formatted]
      except Exception, e:
        raise ValueError(_('Failed to determine partition keys for Impala table: `%s`.`%s`') % (database, table_name))
    else:
      partitions = [PartitionValueCompatible(partition, table) for partition in partition_table.rows()]

    if reverse_sort:
      partitions.reverse()

    if max_parts is None or max_parts <= 0:
      max_parts = LIST_PARTITIONS_LIMIT.get()

    return partitions[:max_parts]


  def get_configuration(self):
    configuration = {}

    if self.query_server['server_name'] == 'impala':  # Return all configuration settings
      query = 'SET'
      results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_NEXT)
      configuration = dict((row[0], row[1]) for row in results.rows())
    else:  # For Hive, only return white-listed configurations
      query = 'SET -v'
      results = self.execute_query_statement(query, orientation=TFetchOrientation.FETCH_FIRST)
      config_whitelist = [config.lower() for config in CONFIG_WHITELIST.get()]