Esempio n. 1
0
def closest_ds_partition(table,
                         ds,
                         before=True,
                         schema="default",
                         metastore_conn_id='metastore_default'):
    '''
    This function finds the date in a list closest to the target date.
    An optional parameter can be given to get the closest before or after.

    :param table: A hive table name
    :type table: str
    :param ds: A datestamp ``%Y-%m-%d`` e.g. ``yyyy-mm-dd``
    :type ds: datetime.date list
    :param before: closest before (True), after (False) or either side of ds
    :type before: bool or None
    :returns: The closest date
    :rtype: str or None

    >>> tbl = 'airflow.static_babynames_partitioned'
    >>> closest_ds_partition(tbl, '2015-01-02')
    '2015-01-01'
    '''
    from airflow.hooks import HiveMetastoreHook
    if '.' in table:
        schema, table = table.split('.')
    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)
    partitions = hh.get_partitions(schema=schema, table_name=table)
    if not partitions:
        return None
    part_vals = [list(p.values())[0] for p in partitions]
    if ds in part_vals:
        return ds
    else:
        parts = [
            datetime.datetime.strptime(pv, '%Y-%m-%d') for pv in part_vals
        ]
        target_dt = datetime.datetime.strptime(ds, '%Y-%m-%d')
        closest_ds = _closest_date(target_dt, parts, before_target=before)
        return closest_ds.isoformat()
Esempio n. 2
0
def closest_ds_partition(
        table, ds, before=True, schema="default",
        metastore_conn_id='metastore_default'):
    '''
    This function finds the date in a list closest to the target date.
    An optional paramter can be given to get the closest before or after.

    :param table: A hive table name
    :type table: str
    :param ds: A datestamp ``%Y-%m-%d`` i.e. ``yyyy-mm-dd``
    :type ds: datetime.date list
    :param before: closest before (True), after (False) or either side of ds
    :type before: bool or None
    :returns: The closest date
    :rtype: str or None

    >>> tbl = 'airflow.static_babynames_partitioned'
    >>> closest_ds_partition(tbl, '2015-01-02')
    '2015-01-01'
    '''
    from airflow.hooks import HiveMetastoreHook
    if '.' in table:
        schema, table = table.split('.')
    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)
    partitions = hh.get_partitions(schema=schema, table_name=table)
    if not partitions:
        return None
    part_vals = [p.values()[0] for p in partitions]
    if ds in part_vals:
        return ds
    else:
        parts = [datetime.datetime.strptime(pv, '%Y-%m-%d')
                 for pv in part_vals]
        target_dt = datetime.datetime.strptime(ds, '%Y-%m-%d')
        closest_ds = _closest_date(target_dt, parts, before_target=before)
        return closest_ds.isoformat()