Esempio n. 1
0
def max_partition(
        table, schema="default", field=None, filter=None,
        metastore_conn_id='metastore_default'):
    '''
    Gets the max partition for a table.

    :param schema: The hive schema the table lives in
    :type schema: string
    :param table: The hive table you are interested in, supports the dot
        notation as in "my_database.my_table", if a dot is found,
        the schema param is disregarded
    :type table: string
    :param hive_conn_id: The hive connection you are interested in.
        If your default is set you don't need to use this parameter.
    :type hive_conn_id: string
    :param filter: filter on a subset of partition as in
        `sub_part='specific_value'`
    :type filter: string
    :param field: the field to get the max value from. If there's only
        one partition field, this will be inferred

    >>> max_partition('airflow.static_babynames_partitioned')
    '2015-01-01'
    '''
    from airflow.hooks import HiveMetastoreHook
    if '.' in table:
        schema, table = table.split('.')
    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)
    return hh.max_partition(
        schema=schema, table_name=table, field=field, filter=filter)
Esempio n. 2
0
def max_partition(table,
                  schema="default",
                  field=None,
                  filter=None,
                  metastore_conn_id='metastore_default'):
    '''
    Gets the max partition for a table.

    :param schema: The hive schema the table lives in
    :type schema: string
    :param table: The hive table you are interested in, supports the dot
        notation as in "my_database.my_table", if a dot is found,
        the schema param is disregarded
    :type table: string
    :param hive_conn_id: The hive connection you are interested in.
        If your default is set you don't need to use this parameter.
    :type hive_conn_id: string
    :param filter: filter on a subset of partition as in
        `sub_part='specific_value'`
    :type filter: string
    :param field: the field to get the max value from. If there's only
        one partition field, this will be inferred

    >>> max_partition('airflow.static_babynames_partitioned')
    '2015-01-01'
    '''
    from airflow.hooks import HiveMetastoreHook
    if '.' in table:
        schema, table = table.split('.')
    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)
    return hh.max_partition(schema=schema,
                            table_name=table,
                            field=field,
                            filter=filter)