def test_partitions(): @op(config_schema={"date": str}) def my_op(_): pass @graph def my_graph(): my_op() def config_fn(partition: Partition): return {"ops": {"my_op": {"config": {"date": partition.value}}}} job = my_graph.to_job(config=PartitionedConfig( run_config_for_partition_fn=config_fn, partitions_def=StaticPartitionsDefinition( [Partition("2020-02-25"), Partition("2020-02-26")]), ), ) mode = job.mode_definitions[0] partition_set = mode.get_partition_set_def("my_graph") partitions = partition_set.get_partitions() assert len(partitions) == 2 assert partitions[0].value == "2020-02-25" assert partitions[0].name == "2020-02-25" assert partition_set.run_config_for_partition(partitions[0]) == { "ops": { "my_op": { "config": { "date": "2020-02-25" } } } }
def get_date_range_partitions(current_time=None): check.opt_inst_param(current_time, "current_time", datetime.datetime) tz = timezone if timezone else "UTC" _start = (to_timezone(start, tz) if isinstance(start, PendulumDateTime) else pendulum.instance(start, tz=tz)) if end: _end = end elif current_time: _end = current_time else: _end = pendulum.now(tz) # coerce to the definition timezone if isinstance(_end, PendulumDateTime): _end = to_timezone(_end, tz) else: _end = pendulum.instance(_end, tz=tz) period = pendulum.period(_start, _end) date_names = [ Partition(value=current, name=current.strftime(fmt)) for current in period.range(delta_range, delta_amount) ] # We don't include the last element here by default since we only want # fully completed intervals, and the _end time is in the middle of the interval # represented by the last element of date_names if inclusive: return date_names return date_names[:-1]
def get_schedule_range_partitions(): tz = timezone if timezone else pendulum.now().timezone.name _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else pendulum.instance(start, tz=tz)) if not end: _end = pendulum.now(tz) elif isinstance(end, pendulum.Pendulum): _end = end.in_tz(tz) else: _end = pendulum.instance(end, tz=tz) end_timestamp = _end.timestamp() partitions = [] for next_time in schedule_execution_time_iterator( _start.timestamp(), cron_schedule, tz): partition_time = execution_time_to_partition_fn(next_time) if partition_time.timestamp() > end_timestamp: break if partition_time.timestamp() < _start.timestamp(): continue partitions.append( Partition(value=partition_time, name=partition_time.strftime(fmt))) return partitions[:-1]
def get_date_range_partitions(): tz = timezone if timezone else pendulum.now().timezone.name _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else pendulum.instance(start, tz=tz)) if not end: _end = pendulum.now(tz) elif isinstance(end, pendulum.Pendulum): _end = end.in_tz(tz) else: _end = pendulum.instance(end, tz=tz) period = pendulum.period(_start, _end) date_names = [ Partition(value=current, name=current.strftime(fmt)) for current in period.range(delta_range, delta_amount) ] # We don't include the last element here by default since we only want # fully completed intervals, and the _end time is in the middle of the interval # represented by the last element of date_names if inclusive: return date_names return date_names[:-1]
def test(): return [ bare.to_job( resource_defs={}, config=PartitionedConfig( partitions_def=StaticPartitionsDefinition( [Partition("abc")]), run_config_for_partition_fn=lambda _: {}, ), ) ]
def get_date_range_partitions(): current = start _end = end or datetime.datetime.now() date_names = [] while current < _end: date_names.append( Partition(value=current, name=current.strftime(fmt))) current = current + delta # We don't include the last element here since we only want # fully completed intervals, and the _end time is in the middle of the interval # represented by the last element of date_names return date_names[:-1]
def get_schedule_range_partitions(current_time=None): check.opt_inst_param(current_time, "current_time", datetime.datetime) tz = timezone if timezone else "UTC" _start = ( to_timezone(start, tz) if isinstance(start, PendulumDateTime) else pendulum.instance(start, tz=tz) ) if end: _end = end elif current_time: _end = current_time else: _end = pendulum.now(tz) # coerce to the definition timezone if isinstance(_end, PendulumDateTime): _end = to_timezone(_end, tz) else: _end = pendulum.instance(_end, tz=tz) end_timestamp = _end.timestamp() partitions = [] for next_time in schedule_execution_time_iterator(_start.timestamp(), cron_schedule, tz): partition_time = execution_time_to_partition_fn(next_time) if partition_time.timestamp() > end_timestamp: break if partition_time.timestamp() < _start.timestamp(): continue partitions.append(Partition(value=partition_time, name=partition_time.strftime(fmt))) return partitions if inclusive else partitions[:-1]
start=start, execution_time=time(0, 1), end=end, timezone=timezone, fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, offset=partition_hours_offset, ) assert_expected_partitions(partitions.get_partitions(), expected_partitions) @pytest.mark.parametrize( argnames=["partition_fn"], argvalues=[ (lambda _current_time: [Partition("a_partition")], ), (lambda _current_time: [Partition(x) for x in range(10)], ), ], ) def test_dynamic_partitions_partitions( partition_fn: Callable[[Optional[datetime]], List[Partition]]): partitions = DynamicPartitionsDefinition(partition_fn) assert [(p.name, p.value) for p in partitions.get_partitions() ] == [(p.name, p.value) for p in partition_fn(None)] assert partitions.get_partition_keys() == [ p.name for p in partition_fn(None) ]