def closest_ds_partition(table, ds, before=True, schema="default", metastore_conn_id='metastore_default'): ''' This function finds the date in a list closest to the target date. An optional parameter can be given to get the closest before or after. :param table: A hive table name :type table: str :param ds: A datestamp ``%Y-%m-%d`` e.g. ``yyyy-mm-dd`` :type ds: datetime.date list :param before: closest before (True), after (False) or either side of ds :type before: bool or None :returns: The closest date :rtype: str or None >>> tbl = 'airflow.static_babynames_partitioned' >>> closest_ds_partition(tbl, '2015-01-02') '2015-01-01' ''' from airflow.hooks import HiveMetastoreHook if '.' in table: schema, table = table.split('.') hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id) partitions = hh.get_partitions(schema=schema, table_name=table) if not partitions: return None part_vals = [list(p.values())[0] for p in partitions] if ds in part_vals: return ds else: parts = [ datetime.datetime.strptime(pv, '%Y-%m-%d') for pv in part_vals ] target_dt = datetime.datetime.strptime(ds, '%Y-%m-%d') closest_ds = _closest_date(target_dt, parts, before_target=before) return closest_ds.isoformat()
def closest_ds_partition( table, ds, before=True, schema="default", metastore_conn_id='metastore_default'): ''' This function finds the date in a list closest to the target date. An optional paramter can be given to get the closest before or after. :param table: A hive table name :type table: str :param ds: A datestamp ``%Y-%m-%d`` i.e. ``yyyy-mm-dd`` :type ds: datetime.date list :param before: closest before (True), after (False) or either side of ds :type before: bool or None :returns: The closest date :rtype: str or None >>> tbl = 'airflow.static_babynames_partitioned' >>> closest_ds_partition(tbl, '2015-01-02') '2015-01-01' ''' from airflow.hooks import HiveMetastoreHook if '.' in table: schema, table = table.split('.') hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id) partitions = hh.get_partitions(schema=schema, table_name=table) if not partitions: return None part_vals = [p.values()[0] for p in partitions] if ds in part_vals: return ds else: parts = [datetime.datetime.strptime(pv, '%Y-%m-%d') for pv in part_vals] target_dt = datetime.datetime.strptime(ds, '%Y-%m-%d') closest_ds = _closest_date(target_dt, parts, before_target=before) return closest_ds.isoformat()