def test_paginated_fetch(self, storage): assert storage one, two, three = [ make_new_run_id(), make_new_run_id(), make_new_run_id() ] storage.add_run( TestRunStorage.build_run(run_id=one, pipeline_name="some_pipeline", tags={"mytag": "hello"})) storage.add_run( TestRunStorage.build_run(run_id=two, pipeline_name="some_pipeline", tags={"mytag": "hello"})) storage.add_run( TestRunStorage.build_run(run_id=three, pipeline_name="some_pipeline", tags={"mytag": "hello"})) all_runs = storage.get_runs() assert len(all_runs) == 3 sliced_runs = storage.get_runs(cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.get_runs( PipelineRunsFilter(pipeline_name="some_pipeline")) assert len(all_runs) == 3 sliced_runs = storage.get_runs( PipelineRunsFilter(pipeline_name="some_pipeline"), cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.get_runs( PipelineRunsFilter(tags={"mytag": "hello"})) assert len(all_runs) == 3 sliced_runs = storage.get_runs( PipelineRunsFilter(tags={"mytag": "hello"}), cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two
def test_run_priority_pipeline(): with seven.TemporaryDirectory() as tempdir: instance = DagsterInstance.local_temp(tempdir) low_done = threading.Event() hi_done = threading.Event() # enqueue low-priority tasks low_thread = threading.Thread(target=execute_on_thread, args=(tempdir, 'low_pipeline', -3, low_done)) low_thread.daemon = True low_thread.start() time.sleep( 1) # sleep so that we don't hit any sqlite concurrency issues # enqueue hi-priority tasks hi_thread = threading.Thread(target=execute_on_thread, args=(tempdir, 'hi_pipeline', 3, hi_done)) hi_thread.daemon = True hi_thread.start() time.sleep(5) # sleep to give queue time to prioritize tasks with start_celery_worker(): while not low_done.is_set() or not hi_done.is_set(): time.sleep(1) low_runs = instance.get_runs(filters=PipelineRunsFilter( pipeline_name='low_pipeline')) assert len(low_runs) == 1 low_run = low_runs[0] lowstats = instance.get_run_stats(low_run.run_id) hi_runs = instance.get_runs(filters=PipelineRunsFilter( pipeline_name='hi_pipeline')) assert len(hi_runs) == 1 hi_run = hi_runs[0] histats = instance.get_run_stats(hi_run.run_id) assert lowstats.start_time < histats.start_time assert lowstats.end_time > histats.end_time
def test_fetch_by_snapshot_id(self, storage): assert storage pipeline_def_a = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_def_b = PipelineDefinition(name="some_other_pipeline", solid_defs=[]) pipeline_snapshot_a = pipeline_def_a.get_pipeline_snapshot() pipeline_snapshot_b = pipeline_def_b.get_pipeline_snapshot() pipeline_snapshot_a_id = create_pipeline_snapshot_id( pipeline_snapshot_a) pipeline_snapshot_b_id = create_pipeline_snapshot_id( pipeline_snapshot_b) assert storage.add_pipeline_snapshot( pipeline_snapshot_a) == pipeline_snapshot_a_id assert storage.add_pipeline_snapshot( pipeline_snapshot_b) == pipeline_snapshot_b_id one = make_new_run_id() two = make_new_run_id() storage.add_run( TestRunStorage.build_run( run_id=one, pipeline_name="some_pipeline", pipeline_snapshot_id=pipeline_snapshot_a_id, )) storage.add_run( TestRunStorage.build_run( run_id=two, pipeline_name="some_other_pipeline", pipeline_snapshot_id=pipeline_snapshot_b_id, )) assert len(storage.get_runs()) == 2 runs_a = storage.get_runs( PipelineRunsFilter(snapshot_id=pipeline_snapshot_a_id)) assert len(runs_a) == 1 assert runs_a[0].run_id == one runs_b = storage.get_runs( PipelineRunsFilter(snapshot_id=pipeline_snapshot_b_id)) assert len(runs_b) == 1 assert runs_b[0].run_id == two
def _recent_failed_runs_text(instance): lines = [] runs = instance.get_runs( limit=5, filters=PipelineRunsFilter(statuses=[PipelineRunStatus.FAILURE])) if len(runs) <= 0: return "" for run in runs: lines.append("{:<50}{:<50}{:<20}".format(run.run_id, run.pipeline_name, run.status)) return "Recently failed runs:\n{}".format("\n".join(lines))
def test_add_get_postgres_run_storage(clean_storage): run_storage = clean_storage run_id = make_new_run_id() run_to_add = build_run(pipeline_name='pipeline_name', run_id=run_id) added = run_storage.add_run(run_to_add) assert added fetched_run = run_storage.get_run_by_id(run_id) assert run_to_add == fetched_run assert run_storage.has_run(run_id) assert not run_storage.has_run(make_new_run_id()) assert run_storage.get_runs() == [run_to_add] assert run_storage.get_runs(PipelineRunsFilter(pipeline_name='pipeline_name')) == [run_to_add] assert run_storage.get_runs(PipelineRunsFilter(pipeline_name='nope')) == [] run_storage.wipe() assert run_storage.get_runs() == []
def resolve_runs(self, graphene_info, **kwargs): from .pipelines.pipeline import GraphenePipelineRun filters = PipelineRunsFilter.for_backfill(self._backfill_id) return [ GraphenePipelineRun(r) for r in graphene_info.context.instance.get_runs( filters=filters, limit=kwargs.get("limit"), ) ]
def _fetch_runs_by_partition(instance, partition_set_def, status_filters=None): # query runs db for this partition set filters = PipelineRunsFilter(tags={"dagster/partition_set": partition_set_def.name}) partition_set_runs = instance.get_runs(filters) runs_by_partition = defaultdict(list) for run in partition_set_runs: if not status_filters or run.status in status_filters: runs_by_partition[run.tags["dagster/partition"]].append(run) return runs_by_partition
def resolve_runs(self, graphene_info, **kwargs): filters = kwargs.get("filter") partition_tags = { PARTITION_SET_TAG: self._external_partition_set.name, PARTITION_NAME_TAG: self._partition_name, } if filters is not None: filters = filters.to_selector() runs_filter = PipelineRunsFilter( run_ids=filters.run_ids, pipeline_name=filters.pipeline_name, statuses=filters.statuses, tags=merge_dicts(filters.tags, partition_tags), ) else: runs_filter = PipelineRunsFilter(tags=partition_tags) return get_runs(graphene_info, runs_filter, cursor=kwargs.get("cursor"), limit=kwargs.get("limit"))
def _get_existing_run_for_request(instance, external_schedule, schedule_time, run_request): tags = merge_dicts( PipelineRun.tags_for_schedule(external_schedule), {SCHEDULED_EXECUTION_TIME_TAG: schedule_time.in_tz("UTC").isoformat(),}, ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key runs_filter = PipelineRunsFilter(tags=tags) existing_runs = instance.get_runs(runs_filter) if not len(existing_runs): return None return existing_runs[0]
def _fetch_runs_by_partition(instance, partition_set_def): # query runs db for this partition set filters = PipelineRunsFilter( tags={'dagster/partition_set': partition_set_def.name}) partition_set_runs = instance.get_runs(filters) runs_by_partition = defaultdict(list) for run in partition_set_runs: runs_by_partition[run.tags['dagster/partition']].append(run) return runs_by_partition
def resolve_numRequested(self, graphene_info): filters = PipelineRunsFilter.for_backfill(self._backfill_job.backfill_id) run_count = graphene_info.context.instance.get_runs_count(filters) if self._backfill_job.status == BulkActionStatus.COMPLETED: return len(self._backfill_job.partition_names) checkpoint = self._backfill_job.last_submitted_partition_name return max( run_count, self._backfill_job.partition_names.index(checkpoint) + 1 if checkpoint and checkpoint in self._backfill_job.partition_names else 0, )
def test_run_record_timestamps(): with get_instance() as instance: freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): result = my_job.execute_in_process(instance=instance) records = instance.get_run_records(filters=PipelineRunsFilter(run_ids=[result.run_id])) assert len(records) == 1 record = records[0] assert record.start_time == 1572670800.0 assert record.end_time == 1572670800.0
def tick_specific_data_from_dagster_tick(graphene_info, tick): from ..pipelines.pipeline import GrapheneRun if tick.status == TickStatus.SUCCESS: if tick.run_ids and graphene_info.context.instance.has_run( tick.run_ids[0]): record = graphene_info.context.instance.get_run_records( PipelineRunsFilter(run_ids=[tick.run_ids[0]]))[0] return GrapheneScheduleTickSuccessData(run=GrapheneRun(record)) return GrapheneScheduleTickSuccessData(run=None) elif tick.status == TickStatus.FAILURE: error = tick.error return GrapheneScheduleTickFailureData(error=error)
def test_fetch_run_filter(self, storage): assert storage one = make_new_run_id() two = make_new_run_id() storage.add_run( TestRunStorage.build_run( run_id=one, pipeline_name="some_pipeline", status=PipelineRunStatus.SUCCESS, ) ) storage.add_run( TestRunStorage.build_run( run_id=two, pipeline_name="some_pipeline", status=PipelineRunStatus.SUCCESS, ), ) assert len(storage.get_runs()) == 2 some_runs = storage.get_runs(PipelineRunsFilter(run_ids=[one, two])) count = storage.get_runs_count(PipelineRunsFilter(run_ids=[one, two])) assert len(some_runs) == 2 assert count == 2
def get_in_progress_runs_by_step(graphene_info, job_names, step_keys): from ..schema.pipelines.pipeline import GrapheneInProgressRunsByStep, GrapheneRun instance = graphene_info.context.instance in_progress_records = [] for job_name in job_names: in_progress_records.extend( instance.get_run_records( PipelineRunsFilter(pipeline_name=job_name, statuses=PENDING_STATUSES) ) ) in_progress_runs_by_step = defaultdict(list) unstarted_runs_by_step = defaultdict(list) for record in in_progress_records: run = record.pipeline_run asset_names = graphene_info.context.instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id ).step_keys_to_execute if run.status in IN_PROGRESS_STATUSES: step_stats = graphene_info.context.instance.get_run_step_stats(run.run_id, step_keys) for step_stat in step_stats: if step_stat.status == StepEventStatus.IN_PROGRESS: in_progress_runs_by_step[step_stat.step_key].append(GrapheneRun(record)) for step_key in asset_names: # step_stats only contains stats for steps that are in progress or complete is_unstarted = ( len([step_stat for step_stat in step_stats if step_stat.step_key == step_key]) == 0 ) if is_unstarted: unstarted_runs_by_step[step_key].append(GrapheneRun(record)) else: # the run never began execution, all steps are unstarted for step_key in asset_names: unstarted_runs_by_step[step_key].append(GrapheneRun(record)) all_step_keys = in_progress_runs_by_step.keys() | unstarted_runs_by_step.keys() return [ GrapheneInProgressRunsByStep( key, unstarted_runs_by_step.get(key, []), in_progress_runs_by_step.get(key, []), ) for key in all_step_keys ]
def get_in_progress_runs_by_step(graphene_info, job_names, step_keys): from ..schema.pipelines.pipeline import GrapheneInProgressRunsByStep, GrapheneRun instance = graphene_info.context.instance in_progress_records = [] for job_name in job_names: in_progress_records.extend( instance.get_run_records( PipelineRunsFilter(pipeline_name=job_name, statuses=IN_PROGRESS_STATUSES) ) ) in_progress_runs_by_step = {} unstarted_runs_by_step = {} for record in in_progress_records: run = record.pipeline_run step_stats = graphene_info.context.instance.get_run_step_stats(run.run_id, step_keys) for step_stat in step_stats: if step_stat.status == StepEventStatus.IN_PROGRESS: if step_stat.step_key not in in_progress_runs_by_step: in_progress_runs_by_step[step_stat.step_key] = [] in_progress_runs_by_step[step_stat.step_key].append(GrapheneRun(record)) asset_names = graphene_info.context.instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id ).step_keys_to_execute for step_key in asset_names: # step_stats only contains stats for steps that are in progress or complete is_unstarted = ( len([step_stat for step_stat in step_stats if step_stat.step_key == step_key]) == 0 ) if is_unstarted: if step_key not in unstarted_runs_by_step: unstarted_runs_by_step[step_key] = [] unstarted_runs_by_step[step_key].append(GrapheneRun(record)) step_runs = [] for key in in_progress_runs_by_step.keys() | unstarted_runs_by_step.keys(): step_runs.append( GrapheneInProgressRunsByStep( key, unstarted_runs_by_step.get(key, []), in_progress_runs_by_step.get(key, []), ) ) return step_runs
def resolve_runs(self, graphene_info, **kwargs): from .pipelines.pipeline import GrapheneRun if kwargs.get("limit") and self._batch_loader: limit = kwargs["limit"] records = (self._batch_loader.get_run_records_for_sensor( self._job_state.name, limit) if self._job_state.job_type == InstigatorType.SENSOR else self._batch_loader.get_run_records_for_schedule( self._job_state.name, limit)) return [GrapheneRun(record) for record in records] if self._job_state.job_type == InstigatorType.SENSOR: filters = PipelineRunsFilter.for_sensor(self._job_state) else: filters = PipelineRunsFilter.for_schedule(self._job_state) return [ GrapheneRun(record) for record in graphene_info.context.instance.get_run_records( filters=filters, limit=kwargs.get("limit"), ) ]
def resolve_runs(self, graphene_info): from .pipelines.pipeline import GrapheneRun instance = graphene_info.context.instance run_ids = self._tick.origin_run_ids or self._tick.run_ids if not run_ids: return [] records_by_id = { record.pipeline_run.run_id: record for record in instance.get_run_records(PipelineRunsFilter(run_ids=run_ids)) } return [GrapheneRun(records_by_id[run_id]) for run_id in run_ids if run_id in records_by_id]
def test_0_12_0_add_mode_column(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_11_16_pre_add_mode_column/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Ensure that you don't get a migration required exception if not trying to use the # migration-required column. assert len(instance.get_runs()) == 1 @solid def basic(): pass @pipeline def noop_pipeline(): basic() # Ensure that you don't get a migration required exception when running a pipeline # pre-migration. result = execute_pipeline(noop_pipeline, instance=instance) assert result.success assert len(instance.get_runs()) == 2 # Ensure that migration required exception throws, since you are trying to use the # migration-required column. with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision="7cba9eeaaf1d"), ): instance.get_runs(filters=PipelineRunsFilter(mode="the_mode")) instance.upgrade() result = execute_pipeline(noop_pipeline, instance=instance) assert result.success assert len(instance.get_runs()) == 3
def test_fetch_by_pipeline(self, storage): assert storage one = make_new_run_id() two = make_new_run_id() storage.add_run( TestRunStorage.build_run(run_id=one, pipeline_name="some_pipeline")) storage.add_run( TestRunStorage.build_run(run_id=two, pipeline_name="some_other_pipeline")) assert len(storage.get_runs()) == 2 some_runs = storage.get_runs( PipelineRunsFilter(pipeline_name="some_pipeline")) assert len(some_runs) == 1 assert some_runs[0].run_id == one
def to_selector(self): if self.status: status = PipelineRunStatus[self.status] else: status = None if self.tags: # We are wrapping self.tags in a list because dauphin.List is not marked as iterable tags = {tag['key']: tag['value'] for tag in list(self.tags)} else: tags = None run_ids = [self.run_id] if self.run_id else [] return PipelineRunsFilter( run_ids=run_ids, pipeline_name=self.pipeline_name, tags=tags, status=status, )
def last_empty_partition(context, partition_set_def): check.inst_param(context, 'context', ScheduleExecutionContext) partition_set_def = check.inst_param( partition_set_def, 'partition_set_def', PartitionSetDefinition ) partitions = partition_set_def.get_partitions() if not partitions: return None selected = None for partition in reversed(partitions): filters = PipelineRunsFilter.for_partition(partition_set_def, partition) matching = context.instance.get_runs(filters) if not any(run.status == PipelineRunStatus.SUCCESS for run in matching): selected = partition break return selected
def get_in_progress_runs_for_job(graphene_info, job_name): instance = graphene_info.context.instance in_progress_runs_filter = PipelineRunsFilter( pipeline_name=job_name, statuses=[ PipelineRunStatus.STARTING, PipelineRunStatus.MANAGED, PipelineRunStatus.NOT_STARTED, PipelineRunStatus.QUEUED, PipelineRunStatus.STARTED, PipelineRunStatus.CANCELING, ], ) return instance.get_runs(in_progress_runs_filter)
def _fetch_last_run(instance, external_partition_set, partition_name): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(external_partition_set, "external_partition_set", ExternalPartitionSet) check.str_param(partition_name, "partition_name") runs = instance.get_runs( PipelineRunsFilter( pipeline_name=external_partition_set.pipeline_name, tags={ PARTITION_SET_TAG: external_partition_set.name, PARTITION_NAME_TAG: partition_name, }, ), limit=1, ) return runs[0] if runs else None
def test_by_job(self, storage): if not storage.supports_bucket_queries: pytest.skip("storage cannot bucket") def _add_run(job_name, tags=None): return storage.add_run( TestRunStorage.build_run( pipeline_name=job_name, run_id=make_new_run_id(), tags=tags ) ) _a_one = _add_run("a_pipeline", tags={"a": "A"}) a_two = _add_run("a_pipeline", tags={"a": "A"}) _b_one = _add_run("b_pipeline", tags={"a": "A"}) b_two = _add_run("b_pipeline", tags={"a": "A"}) c_one = _add_run("c_pipeline", tags={"a": "A"}) c_two = _add_run("c_pipeline", tags={"a": "B"}) runs_by_job = { run.pipeline_name: run for run in storage.get_runs( bucket_by=JobBucket( job_names=["a_pipeline", "b_pipeline", "c_pipeline"], bucket_limit=1 ) ) } assert set(runs_by_job.keys()) == {"a_pipeline", "b_pipeline", "c_pipeline"} assert runs_by_job.get("a_pipeline").run_id == a_two.run_id assert runs_by_job.get("b_pipeline").run_id == b_two.run_id assert runs_by_job.get("c_pipeline").run_id == c_two.run_id # fetch with a runs filter applied runs_by_job = { run.pipeline_name: run for run in storage.get_runs( filters=PipelineRunsFilter(tags={"a": "A"}), bucket_by=JobBucket( job_names=["a_pipeline", "b_pipeline", "c_pipeline"], bucket_limit=1 ), ) } assert set(runs_by_job.keys()) == {"a_pipeline", "b_pipeline", "c_pipeline"} assert runs_by_job.get("a_pipeline").run_id == a_two.run_id assert runs_by_job.get("b_pipeline").run_id == b_two.run_id assert runs_by_job.get("c_pipeline").run_id == c_one.run_id
def test_fetch_run_groups_filter(self, storage): assert storage root_runs = [ TestRunStorage.build_run(run_id=make_new_run_id(), pipeline_name="foo_pipeline") for i in range(3) ] runs = [run for run in root_runs] for root_run in root_runs: failed_run_id = make_new_run_id() runs.append( TestRunStorage.build_run( run_id=failed_run_id, pipeline_name="foo_pipeline", tags={ PARENT_RUN_ID_TAG: root_run.run_id, ROOT_RUN_ID_TAG: root_run.run_id }, status=PipelineRunStatus.FAILURE, )) for _ in range(3): runs.append( TestRunStorage.build_run( run_id=make_new_run_id(), pipeline_name="foo_pipeline", tags={ PARENT_RUN_ID_TAG: failed_run_id, ROOT_RUN_ID_TAG: root_run.run_id }, )) for run in runs: storage.add_run(run) run_groups = storage.get_run_groups( limit=5, filters=PipelineRunsFilter(status=PipelineRunStatus.FAILURE)) assert len(run_groups) == 3 for root_run_id in run_groups: assert len(run_groups[root_run_id]["runs"]) == 2 assert run_groups[root_run_id]["count"] == 5
def get_latest_asset_run_by_step_key(graphene_info, asset_nodes): from ..schema.pipelines.pipeline import ( GrapheneLatestRun, GrapheneRun, ) # This method returns the latest run that has occurred for a given step. # Because it is expensive to deserialize PipelineRun objects, we limit this # query to retrieving the last 5 runs per job. If no runs have occurred, we return # a GrapheneLatestRun object with no run. If none of the latest runs contain the # step key, we return None. instance = graphene_info.context.instance latest_run_by_step: Dict[str, PipelineRun] = {} for asset_node in asset_nodes: job_names = asset_node.job_names step_key = asset_node.op_name run_records = [] for job_name in job_names: run_records.extend( instance.get_run_records(PipelineRunsFilter(pipeline_name=job_name), limit=5) ) if len(run_records) == 0: latest_run_by_step[step_key] = GrapheneLatestRun(step_key, None) latest_run = None for record in run_records: run = record.pipeline_run asset_names = graphene_info.context.instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id ).step_keys_to_execute if step_key in asset_names: if latest_run == None or record.create_timestamp > latest_run.create_timestamp: latest_run = record if latest_run: latest_run_by_step[step_key] = GrapheneLatestRun(step_key, GrapheneRun(latest_run)) return latest_run_by_step
def poll_for_finished_run(instance, run_id=None, timeout=20, run_tags=None): total_time = 0 interval = 0.01 filters = PipelineRunsFilter( run_ids=[run_id] if run_id else None, tags=run_tags, statuses=[PipelineRunStatus.SUCCESS, PipelineRunStatus.FAILURE, PipelineRunStatus.CANCELED], ) while True: runs = instance.get_runs(filters, limit=1) if runs: return runs[0] else: time.sleep(interval) total_time += interval if total_time > timeout: raise Exception("Timed out")
def _get_or_create_sensor_run(context, instance, repo_location, external_sensor, external_pipeline, run_request): if not run_request.run_key: return _create_sensor_run(context, instance, repo_location, external_sensor, external_pipeline, run_request) existing_runs = instance.get_runs( PipelineRunsFilter(tags=merge_dicts( PipelineRun.tags_for_sensor(external_sensor), {RUN_KEY_TAG: run_request.run_key}, ))) if len(existing_runs): check.invariant(len(existing_runs) == 1) run = existing_runs[0] if run.status != PipelineRunStatus.NOT_STARTED: # A run already exists and was launched for this time period, # but the scheduler must have crashed before the tick could be put # into a SUCCESS state context.logger.info( f"Run {run.run_id} already completed with the run key " f"`{run_request.run_key}` for {external_sensor.name}") context.add_state( JobTickStatus.SUCCESS, run_id=run.run_id, run_key=run_request.run_key, ) return None else: context.logger.info( f"Run {run.run_id} already created with the run key " f"`{run_request.run_key}` for {external_sensor.name}") return run context.logger.info(f"Creating new run for {external_sensor.name}") return _create_sensor_run(context, instance, repo_location, external_sensor, external_pipeline, run_request)
def test_by_tag(self, storage): if not storage.supports_bucket_queries: pytest.skip("storage cannot bucket") def _add_run(job_name, tags=None): return storage.add_run( TestRunStorage.build_run( pipeline_name=job_name, run_id=make_new_run_id(), tags=tags ) ) _one = _add_run("a", tags={"a": "1"}) _two = _add_run("a", tags={"a": "2"}) three = _add_run("a", tags={"a": "3"}) _none = _add_run("a") b = _add_run("b", tags={"a": "4"}) one = _add_run("a", tags={"a": "1"}) two = _add_run("a", tags={"a": "2"}) runs_by_tag = { run.tags.get("a"): run for run in storage.get_runs( bucket_by=TagBucket(tag_key="a", tag_values=["1", "2", "3", "4"], bucket_limit=1) ) } assert set(runs_by_tag.keys()) == {"1", "2", "3", "4"} assert runs_by_tag.get("1").run_id == one.run_id assert runs_by_tag.get("2").run_id == two.run_id assert runs_by_tag.get("3").run_id == three.run_id assert runs_by_tag.get("4").run_id == b.run_id runs_by_tag = { run.tags.get("a"): run for run in storage.get_runs( filters=PipelineRunsFilter(pipeline_name="a"), bucket_by=TagBucket(tag_key="a", tag_values=["1", "2", "3", "4"], bucket_limit=1), ) } assert set(runs_by_tag.keys()) == {"1", "2", "3"} assert runs_by_tag.get("1").run_id == one.run_id assert runs_by_tag.get("2").run_id == two.run_id assert runs_by_tag.get("3").run_id == three.run_id