def test_run_priority_pipeline(rabbitmq): with tempfile.TemporaryDirectory() as tempdir: with instance_for_test(temp_dir=tempdir) as instance: low_done = threading.Event() hi_done = threading.Event() # enqueue low-priority tasks low_thread = threading.Thread( target=execute_on_thread, args=("low_pipeline", low_done, instance.get_ref()), kwargs={ "tempdir": tempdir, "tags": { DAGSTER_CELERY_RUN_PRIORITY_TAG: "-3" } }, ) low_thread.daemon = True low_thread.start() time.sleep( 1) # sleep so that we don't hit any sqlite concurrency issues # enqueue hi-priority tasks hi_thread = threading.Thread( target=execute_on_thread, args=("hi_pipeline", hi_done, instance.get_ref()), kwargs={ "tempdir": tempdir, "tags": { DAGSTER_CELERY_RUN_PRIORITY_TAG: "3" } }, ) hi_thread.daemon = True hi_thread.start() time.sleep(5) # sleep to give queue time to prioritize tasks with start_celery_worker(): while not low_done.is_set() or not hi_done.is_set(): time.sleep(1) low_runs = instance.get_runs(filters=RunsFilter( pipeline_name="low_pipeline")) assert len(low_runs) == 1 low_run = low_runs[0] lowstats = instance.get_run_stats(low_run.run_id) hi_runs = instance.get_runs(filters=RunsFilter( pipeline_name="hi_pipeline")) assert len(hi_runs) == 1 hi_run = hi_runs[0] histats = instance.get_run_stats(hi_run.run_id) assert lowstats.start_time < histats.start_time assert lowstats.end_time > histats.end_time
def test_execute_schedule_on_celery_k8s( # pylint: disable=redefined-outer-name, disable=unused-argument dagster_instance_for_daemon, helm_namespace_for_daemon): schedule_name = "frequent_celery" with get_test_project_external_schedule( dagster_instance_for_daemon, schedule_name) as external_schedule: reoriginated_schedule = ReOriginatedExternalScheduleForTest( external_schedule) dagster_instance_for_daemon.start_schedule(reoriginated_schedule) scheduler_runs = dagster_instance_for_daemon.get_runs( RunsFilter( tags=PipelineRun.tags_for_schedule(reoriginated_schedule))) assert len(scheduler_runs) == 0 try: start_time = time.time() while True: schedule_runs = dagster_instance_for_daemon.get_runs( RunsFilter(tags=PipelineRun.tags_for_schedule( reoriginated_schedule))) if len(schedule_runs) > 0: break if time.time() - start_time > 120: raise Exception( "Timed out waiting for schedule to start a run. " "Check the dagster-daemon pod logs to see why it didn't start." ) time.sleep(1) continue finally: dagster_instance_for_daemon.stop_schedule( reoriginated_schedule.get_external_origin_id(), reoriginated_schedule.selector_id, reoriginated_schedule, ) last_run = schedule_runs[0] finished_pipeline_run = poll_for_finished_run( dagster_instance_for_daemon, last_run.run_id, timeout=120) assert finished_pipeline_run.is_success
def _get_queued_runs(self, instance): queued_runs_filter = RunsFilter(statuses=[PipelineRunStatus.QUEUED]) # Reversed for fifo ordering # Note: should add a maximum fetch limit https://github.com/dagster-io/dagster/issues/3339 runs = instance.get_runs(filters=queued_runs_filter)[::-1] return runs
def execute_monitoring_iteration(instance, workspace, logger, _debug_crash_flags=None): check.invariant(instance.run_launcher.supports_check_run_worker_health, "Must use a supported run launcher") # TODO: consider limiting number of runs to fetch runs = instance.get_runs(filters=RunsFilter( statuses=IN_PROGRESS_RUN_STATUSES)) logger.info(f"Collected {len(runs)} runs for monitoring") for run in runs: try: logger.info(f"Checking run {run.run_id}") if run.status == PipelineRunStatus.STARTING: monitor_starting_run(instance, run, logger) elif run.status == PipelineRunStatus.STARTED: monitor_started_run(instance, workspace, run, logger) elif run.status == PipelineRunStatus.CANCELING: # TODO: implement canceling timeouts pass else: check.invariant(False, f"Unexpected run status: {run.status}") except Exception: error_info = serializable_error_info_from_exc_info(sys.exc_info()) logger.error(f"Hit error while monitoring run {run.run_id}: " f"{str(error_info)}") yield error_info else: yield
def terminate_pipeline_execution(instance, run_id, terminate_policy): from ...schema.errors import GrapheneRunNotFoundError from ...schema.pipelines.pipeline import GrapheneRun from ...schema.roots.mutation import ( GrapheneTerminateRunFailure, GrapheneTerminateRunPolicy, GrapheneTerminateRunSuccess, ) check.inst_param(instance, "instance", DagsterInstance) check.str_param(run_id, "run_id") records = instance.get_run_records(RunsFilter(run_ids=[run_id])) force_mark_as_canceled = (terminate_policy == GrapheneTerminateRunPolicy. MARK_AS_CANCELED_IMMEDIATELY) if not records: return GrapheneRunNotFoundError(run_id) record = records[0] run = record.pipeline_run graphene_run = GrapheneRun(record) valid_status = not run.is_finished and ( force_mark_as_canceled or (run.status == PipelineRunStatus.STARTED or run.status == PipelineRunStatus.QUEUED)) if not valid_status: return GrapheneTerminateRunFailure( run=graphene_run, message= "Run {run_id} could not be terminated due to having status {status}." .format(run_id=run.run_id, status=run.status.value), ) if force_mark_as_canceled: try: if instance.run_coordinator and instance.run_coordinator.can_cancel_run( run_id): instance.run_coordinator.cancel_run(run_id) except: instance.report_engine_event( "Exception while attempting to force-terminate run. Run will still be marked as canceled.", pipeline_name=run.pipeline_name, run_id=run.run_id, engine_event_data=EngineEventData( error=serializable_error_info_from_exc_info( sys.exc_info()), ), ) return _force_mark_as_canceled(instance, run_id) if (instance.run_coordinator and instance.run_coordinator.can_cancel_run(run_id) and instance.run_coordinator.cancel_run(run_id)): return GrapheneTerminateRunSuccess(graphene_run) return GrapheneTerminateRunFailure( run=graphene_run, message="Unable to terminate run {run_id}".format(run_id=run.run_id))
def get_run_groups(graphene_info, filters=None, cursor=None, limit=None): from ..schema.pipelines.pipeline import GrapheneRun from ..schema.runs import GrapheneRunGroup check.opt_inst_param(filters, "filters", RunsFilter) check.opt_str_param(cursor, "cursor") check.opt_int_param(limit, "limit") instance = graphene_info.context.instance run_groups = instance.get_run_groups(filters=filters, cursor=cursor, limit=limit) run_ids = { run.run_id for run_group in run_groups.values() for run in run_group.get("runs", []) } records_by_ids = { record.pipeline_run.run_id: record for record in instance.get_run_records( RunsFilter(run_ids=list(run_ids))) } for root_run_id in run_groups: run_groups[root_run_id]["runs"] = [ GrapheneRun(records_by_ids.get(run.run_id)) for run in run_groups[root_run_id]["runs"] ] return [ GrapheneRunGroup(root_run_id=root_run_id, runs=run_group["runs"]) for root_run_id, run_group in run_groups.items() ]
def get_partition_set_partition_statuses(graphene_info, repository_handle, partition_set_name): from ..schema.partition_sets import GraphenePartitionStatus, GraphenePartitionStatuses check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") result = graphene_info.context.get_external_partition_names( repository_handle, partition_set_name) all_partition_set_runs = graphene_info.context.instance.get_runs( RunsFilter(tags={PARTITION_SET_TAG: partition_set_name})) runs_by_partition = {} for run in all_partition_set_runs: partition_name = run.tags.get(PARTITION_NAME_TAG) if not partition_name or partition_name in runs_by_partition: # all_partition_set_runs is in descending order by creation time, we should ignore # runs for the same partition if we've already considered the partition continue runs_by_partition[partition_name] = run return GraphenePartitionStatuses(results=[ GraphenePartitionStatus( id=f"{partition_set_name}:{partition_name}", partitionName=partition_name, runStatus=runs_by_partition[partition_name]. status if runs_by_partition.get(partition_name) else None, ) for partition_name in result.partition_names ])
def to_selector(self): if self.tags: # We are wrapping self.tags in a list because graphene.List is not marked as iterable tags = {tag["key"]: tag["value"] for tag in list(self.tags)} else: tags = None if self.statuses: statuses = [ PipelineRunStatus[status] for status in self.statuses # pylint: disable=not-an-iterable ] else: statuses = None updated_after = pendulum.from_timestamp( self.updatedAfter) if self.updatedAfter else None created_before = pendulum.from_timestamp( self.createdBefore) if self.createdBefore else None return RunsFilter( run_ids=self.runIds if self.runIds else None, pipeline_name=self.pipelineName, tags=tags, statuses=statuses, snapshot_id=self.snapshotId, updated_after=updated_after, mode=self.mode, created_before=created_before, )
def _get_partitions_chunk(instance, logger, backfill_job, chunk_size): check.inst_param(backfill_job, "backfill_job", PartitionBackfill) partition_names = backfill_job.partition_names checkpoint = backfill_job.last_submitted_partition_name if (backfill_job.last_submitted_partition_name and backfill_job.last_submitted_partition_name in partition_names): index = partition_names.index( backfill_job.last_submitted_partition_name) partition_names = partition_names[index + 1:] # for idempotence, fetch all runs with the current backfill id backfill_runs = instance.get_runs( RunsFilter( tags=PipelineRun.tags_for_backfill_id(backfill_job.backfill_id))) completed_partitions = set( [run.tags.get(PARTITION_NAME_TAG) for run in backfill_runs]) initial_checkpoint = (partition_names.index(checkpoint) + 1 if checkpoint and checkpoint in partition_names else 0) partition_names = partition_names[initial_checkpoint:] has_more = chunk_size < len(partition_names) partitions_chunk = partition_names[:chunk_size] next_checkpoint = partitions_chunk[-1] to_skip = set(partitions_chunk).intersection(completed_partitions) if to_skip: logger.info( f"Found {len(to_skip)} existing runs for backfill {backfill_job.backfill_id}, skipping" ) to_submit = [ partition_name for partition_name in partitions_chunk if partition_name not in completed_partitions ] return to_submit, next_checkpoint, has_more
def _force_mark_as_canceled(instance, run_id): from ...schema.pipelines.pipeline import GrapheneRun from ...schema.roots.mutation import GrapheneTerminateRunSuccess reloaded_record = instance.get_run_records(RunsFilter(run_ids=[run_id]))[0] if not reloaded_record.pipeline_run.is_finished: message = ( "This pipeline was forcibly marked as canceled from outside the execution context. The " "computational resources created by the run may not have been fully cleaned up." ) instance.report_run_canceled(reloaded_record.pipeline_run, message=message) reloaded_record = instance.get_run_records( RunsFilter(run_ids=[run_id]))[0] return GrapheneTerminateRunSuccess(GrapheneRun(reloaded_record))
def get_run_by_id(graphene_info, run_id): from ..schema.errors import GrapheneRunNotFoundError from ..schema.pipelines.pipeline import GrapheneRun instance = graphene_info.context.instance records = instance.get_run_records(RunsFilter(run_ids=[run_id])) if not records: return GrapheneRunNotFoundError(run_id) else: return GrapheneRun(records[0])
def resolve_runs(self, graphene_info, **kwargs): filters = kwargs.get("filter") partition_tags = { PARTITION_SET_TAG: self._external_partition_set.name, PARTITION_NAME_TAG: self._partition_name, } if filters is not None: filters = filters.to_selector() runs_filter = RunsFilter( run_ids=filters.run_ids, pipeline_name=filters.job_name, statuses=filters.statuses, tags=merge_dicts(filters.tags, partition_tags), ) else: runs_filter = RunsFilter(tags=partition_tags) return get_runs( graphene_info, runs_filter, cursor=kwargs.get("cursor"), limit=kwargs.get("limit") )
def _recent_failed_runs_text(instance): lines = [] runs = instance.get_runs( limit=5, filters=RunsFilter(statuses=[PipelineRunStatus.FAILURE, PipelineRunStatus.CANCELED]), ) if len(runs) <= 0: return "" for run in runs: lines.append("{:<50}{:<50}{:<20}".format(run.run_id, run.pipeline_name, run.status)) return "Recently failed runs:\n{}".format("\n".join(lines))
def resolve_runs(self, graphene_info, **kwargs): from .pipelines.pipeline import GrapheneRun filters = RunsFilter.for_backfill(self._backfill_job.backfill_id) return [ GrapheneRun(record) for record in graphene_info.context.instance.get_run_records( filters=filters, limit=kwargs.get("limit"), ) ]
def _fetch_runs_by_partition(instance, partition_set_def, status_filters=None): # query runs db for this partition set filters = RunsFilter(tags={"dagster/partition_set": partition_set_def.name}) partition_set_runs = instance.get_runs(filters) runs_by_partition = defaultdict(list) for run in partition_set_runs: if not status_filters or run.status in status_filters: runs_by_partition[run.tags["dagster/partition"]].append(run) return runs_by_partition
def get_latest_asset_run_by_step_key(graphene_info, job_names, asset_nodes): from ..schema.pipelines.pipeline import GrapheneLatestRun, GrapheneRun # This method returns the latest run that has occurred for a given step. # Because it is expensive to deserialize PipelineRun objects, we limit this # query to retrieving the last 5 runs per job. If no runs have occurred, we return # a GrapheneLatestRun object with no run. If none of the latest runs contain the # step key, we return None. instance = graphene_info.context.instance latest_run_by_step: Dict[str, PipelineRun] = {} run_records_by_job = { job_name: instance.get_run_records(RunsFilter(pipeline_name=job_name), limit=5) for job_name in job_names } get_step_keys = lambda record: graphene_info.context.instance.get_execution_plan_snapshot( record.pipeline_run.execution_plan_snapshot_id).step_keys_to_execute # step_keys list is in same order as records list in get_step_keys record_step_keys_by_job = { job_name: [get_step_keys(record) for record in run_records] for job_name, run_records in run_records_by_job.items() } for asset_node in asset_nodes: asset_job_names = asset_node.job_names step_key = asset_node.op_name total_num_records = sum([ len(run_records_by_job.get(job_name, [])) for job_name in asset_job_names ]) if total_num_records == 0: latest_run_by_step[step_key] = GrapheneLatestRun(step_key, None) latest_run = None for job_name in asset_job_names: record_step_keys = record_step_keys_by_job.get(job_name, []) for i in range(len(record_step_keys)): if step_key in record_step_keys[i]: record = run_records_by_job.get(job_name)[i] if latest_run == None or record.create_timestamp > latest_run.create_timestamp: latest_run = record if latest_run: latest_run_by_step[step_key] = GrapheneLatestRun( step_key, GrapheneRun(latest_run)) return latest_run_by_step
def _fetch_existing_runs(instance, external_sensor, run_requests): run_keys = [ run_request.run_key for run_request in run_requests if run_request.run_key ] if not run_keys: return {} existing_runs = {} if instance.supports_bucket_queries: runs = instance.get_runs( filters=RunsFilter( tags=PipelineRun.tags_for_sensor(external_sensor), ), bucket_by=TagBucket( tag_key=RUN_KEY_TAG, bucket_limit=1, tag_values=run_keys, ), ) for run in runs: tags = run.tags or {} run_key = tags.get(RUN_KEY_TAG) existing_runs[run_key] = run return existing_runs else: for run_key in run_keys: runs = instance.get_runs( filters=RunsFilter(tags=merge_dicts( PipelineRun.tags_for_sensor(external_sensor), {RUN_KEY_TAG: run_key}, )), limit=1, ) if runs: existing_runs[run_key] = runs[0] return existing_runs
def resolve_numRequested(self, graphene_info): filters = RunsFilter.for_backfill(self._backfill_job.backfill_id) run_count = graphene_info.context.instance.get_runs_count(filters) if self._backfill_job.status == BulkActionStatus.COMPLETED: return len(self._backfill_job.partition_names) checkpoint = self._backfill_job.last_submitted_partition_name return max( run_count, self._backfill_job.partition_names.index(checkpoint) + 1 if checkpoint and checkpoint in self._backfill_job.partition_names else 0, )
def tick_specific_data_from_dagster_tick(graphene_info, tick): from ..pipelines.pipeline import GrapheneRun if tick.status == TickStatus.SUCCESS: if tick.run_ids and graphene_info.context.instance.has_run( tick.run_ids[0]): record = graphene_info.context.instance.get_run_records( RunsFilter(run_ids=[tick.run_ids[0]]))[0] return GrapheneScheduleTickSuccessData(run=GrapheneRun(record)) return GrapheneScheduleTickSuccessData(run=None) elif tick.status == TickStatus.FAILURE: error = tick.error return GrapheneScheduleTickFailureData(error=error)
def get_in_progress_runs_by_step(graphene_info, job_names, step_keys): from ..schema.pipelines.pipeline import GrapheneInProgressRunsByStep, GrapheneRun instance = graphene_info.context.instance in_progress_records = [] for job_name in job_names: in_progress_records.extend( instance.get_run_records( RunsFilter(pipeline_name=job_name, statuses=PENDING_STATUSES))) in_progress_runs_by_step = defaultdict(list) unstarted_runs_by_step = defaultdict(list) for record in in_progress_records: run = record.pipeline_run asset_names = graphene_info.context.instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id).step_keys_to_execute if run.status in IN_PROGRESS_STATUSES: step_stats = graphene_info.context.instance.get_run_step_stats( run.run_id, step_keys) for step_stat in step_stats: if step_stat.status == StepEventStatus.IN_PROGRESS: in_progress_runs_by_step[step_stat.step_key].append( GrapheneRun(record)) for step_key in asset_names: # step_stats only contains stats for steps that are in progress or complete is_unstarted = (len([ step_stat for step_stat in step_stats if step_stat.step_key == step_key ]) == 0) if is_unstarted: unstarted_runs_by_step[step_key].append( GrapheneRun(record)) else: # the run never began execution, all steps are unstarted for step_key in asset_names: unstarted_runs_by_step[step_key].append(GrapheneRun(record)) all_step_keys = in_progress_runs_by_step.keys( ) | unstarted_runs_by_step.keys() return [ GrapheneInProgressRunsByStep( key, unstarted_runs_by_step.get(key, []), in_progress_runs_by_step.get(key, []), ) for key in all_step_keys ]
def _get_existing_run_for_request(instance, external_schedule, schedule_time, run_request): tags = merge_dicts( PipelineRun.tags_for_schedule(external_schedule), { SCHEDULED_EXECUTION_TIME_TAG: to_timezone(schedule_time, "UTC").isoformat(), }, ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key runs_filter = RunsFilter(tags=tags) existing_runs = instance.get_runs(runs_filter) if not len(existing_runs): return None return existing_runs[0]
def test_0_12_0_add_mode_column(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_11_16_pre_add_mode_column/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Ensure that you don't get a migration required exception if not trying to use the # migration-required column. assert len(instance.get_runs()) == 1 @solid def basic(): pass @pipeline def noop_pipeline(): basic() # Ensure that you don't get a migration required exception when running a pipeline # pre-migration. result = execute_pipeline(noop_pipeline, instance=instance) assert result.success assert len(instance.get_runs()) == 2 # Ensure that migration required exception throws, since you are trying to use the # migration-required column. with pytest.raises( DagsterInstanceSchemaOutdated, match=_migration_regex(current_revision="7cba9eeaaf1d"), ): instance.get_runs(filters=RunsFilter(mode="the_mode")) instance.upgrade() result = execute_pipeline(noop_pipeline, instance=instance) assert result.success assert len(instance.get_runs()) == 3
def _launch_pipeline_execution(graphene_info, execution_params, is_reexecuted=False): from ...schema.pipelines.pipeline import GrapheneRun from ...schema.runs import GrapheneLaunchRunSuccess check.inst_param(graphene_info, "graphene_info", ResolveInfo) check.inst_param(execution_params, "execution_params", ExecutionParams) check.bool_param(is_reexecuted, "is_reexecuted") run = do_launch(graphene_info, execution_params, is_reexecuted) records = graphene_info.context.instance.get_run_records( RunsFilter(run_ids=[run.run_id])) return GrapheneLaunchRunSuccess(run=GrapheneRun(records[0]))
def resolve_runs(self, graphene_info, **kwargs): from .pipelines.pipeline import GrapheneRun if kwargs.get("limit") and self._batch_loader: limit = kwargs["limit"] records = (self._batch_loader.get_run_records_for_sensor( self._instigator_state.name, limit) if self._instigator_state.instigator_type == InstigatorType.SENSOR else self._batch_loader.get_run_records_for_schedule( self._instigator_state.name, limit)) return [GrapheneRun(record) for record in records] if self._instigator_state.instigator_type == InstigatorType.SENSOR: filters = RunsFilter.for_sensor(self._instigator_state) else: filters = RunsFilter.for_schedule(self._instigator_state) return [ GrapheneRun(record) for record in graphene_info.context.instance.get_run_records( filters=filters, limit=kwargs.get("limit"), ) ]
def resolve_runs(self, graphene_info): from .pipelines.pipeline import GrapheneRun instance = graphene_info.context.instance run_ids = self._tick.origin_run_ids or self._tick.run_ids if not run_ids: return [] records_by_id = { record.pipeline_run.run_id: record for record in instance.get_run_records(RunsFilter(run_ids=run_ids)) } return [ GrapheneRun(records_by_id[run_id]) for run_id in run_ids if run_id in records_by_id ]
def _fetch_last_run(instance, external_partition_set, partition_name): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(external_partition_set, "external_partition_set", ExternalPartitionSet) check.str_param(partition_name, "partition_name") runs = instance.get_runs( RunsFilter( pipeline_name=external_partition_set.pipeline_name, tags={ PARTITION_SET_TAG: external_partition_set.name, PARTITION_NAME_TAG: partition_name, }, ), limit=1, ) return runs[0] if runs else None
def last_empty_partition(context, partition_set_def): check.inst_param(context, "context", ScheduleEvaluationContext) partition_set_def = check.inst_param(partition_set_def, "partition_set_def", PartitionSetDefinition) partitions = partition_set_def.get_partitions( context.scheduled_execution_time) if not partitions: return None selected = None for partition in reversed(partitions): filters = RunsFilter.for_partition(partition_set_def, partition) matching = context.instance.get_runs(filters) if not any(run.status == PipelineRunStatus.SUCCESS for run in matching): selected = partition break return selected
def resolve_stepStats(self, _graphene_info, limit): if self._solid.get_is_dynamic_mapped(): return GrapheneSolidStepStatsUnavailableError( message="Step stats are not available for dynamically-mapped ops" ) if self._solid.get_is_composite(): return GrapheneSolidStepStatsUnavailableError( message="Step stats are not available for composite solids / subgraphs" ) instance = _graphene_info.context.instance runs_filter = RunsFilter(pipeline_name=self._solid.get_pipeline_name()) runs = instance.get_runs(runs_filter, limit=limit) nodes = [] for run in runs: stats = instance.get_run_step_stats(run.run_id, [str(self.handleID)]) if len(stats): nodes.append(GrapheneRunStepStats(stats[0])) return GrapheneSolidStepStatsConnection(nodes=nodes)
def get_pipeline_run_observable(graphene_info, run_id, after=None): from ...schema.pipelines.pipeline import GrapheneRun from ...schema.pipelines.subscription import ( GraphenePipelineRunLogsSubscriptionFailure, GraphenePipelineRunLogsSubscriptionSuccess, ) from ..events import from_event_record check.inst_param(graphene_info, "graphene_info", ResolveInfo) check.str_param(run_id, "run_id") check.opt_int_param(after, "after") instance = graphene_info.context.instance records = instance.get_run_records(RunsFilter(run_ids=[run_id])) if not records: def _get_error_observable(observer): observer.on_next( GraphenePipelineRunLogsSubscriptionFailure( missingRunId=run_id, message="Could not load run with id {}".format(run_id))) return Observable.create(_get_error_observable) # pylint: disable=E1101 record = records[0] run = record.pipeline_run def _handle_events(payload): events, loading_past = payload return GraphenePipelineRunLogsSubscriptionSuccess( run=GrapheneRun(record), messages=[ from_event_record(event, run.pipeline_name) for event in events ], hasMorePastEvents=loading_past, ) # pylint: disable=E1101 return Observable.create( PipelineRunObservableSubscribe(instance, run_id, after_cursor=after)).map(_handle_events)
def poll_for_finished_run(instance, run_id=None, timeout=20, run_tags=None): total_time = 0 interval = 0.01 filters = RunsFilter( run_ids=[run_id] if run_id else None, tags=run_tags, statuses=[ PipelineRunStatus.SUCCESS, PipelineRunStatus.FAILURE, PipelineRunStatus.CANCELED ], ) while True: runs = instance.get_runs(filters, limit=1) if runs: return runs[0] else: time.sleep(interval) total_time += interval if total_time > timeout: raise Exception("Timed out")