Esempio n. 1
0
def test_run_priority_pipeline(rabbitmq):
    with tempfile.TemporaryDirectory() as tempdir:
        with instance_for_test(temp_dir=tempdir) as instance:
            low_done = threading.Event()
            hi_done = threading.Event()

            # enqueue low-priority tasks
            low_thread = threading.Thread(
                target=execute_on_thread,
                args=("low_pipeline", low_done, instance.get_ref()),
                kwargs={
                    "tempdir": tempdir,
                    "tags": {
                        DAGSTER_CELERY_RUN_PRIORITY_TAG: "-3"
                    }
                },
            )
            low_thread.daemon = True
            low_thread.start()

            time.sleep(
                1)  # sleep so that we don't hit any sqlite concurrency issues

            # enqueue hi-priority tasks
            hi_thread = threading.Thread(
                target=execute_on_thread,
                args=("hi_pipeline", hi_done, instance.get_ref()),
                kwargs={
                    "tempdir": tempdir,
                    "tags": {
                        DAGSTER_CELERY_RUN_PRIORITY_TAG: "3"
                    }
                },
            )
            hi_thread.daemon = True
            hi_thread.start()

            time.sleep(5)  # sleep to give queue time to prioritize tasks

            with start_celery_worker():
                while not low_done.is_set() or not hi_done.is_set():
                    time.sleep(1)

                low_runs = instance.get_runs(filters=RunsFilter(
                    pipeline_name="low_pipeline"))
                assert len(low_runs) == 1
                low_run = low_runs[0]
                lowstats = instance.get_run_stats(low_run.run_id)
                hi_runs = instance.get_runs(filters=RunsFilter(
                    pipeline_name="hi_pipeline"))
                assert len(hi_runs) == 1
                hi_run = hi_runs[0]
                histats = instance.get_run_stats(hi_run.run_id)

                assert lowstats.start_time < histats.start_time
                assert lowstats.end_time > histats.end_time
Esempio n. 2
0
def test_execute_schedule_on_celery_k8s(  # pylint: disable=redefined-outer-name, disable=unused-argument
        dagster_instance_for_daemon, helm_namespace_for_daemon):
    schedule_name = "frequent_celery"
    with get_test_project_external_schedule(
            dagster_instance_for_daemon, schedule_name) as external_schedule:
        reoriginated_schedule = ReOriginatedExternalScheduleForTest(
            external_schedule)
        dagster_instance_for_daemon.start_schedule(reoriginated_schedule)

        scheduler_runs = dagster_instance_for_daemon.get_runs(
            RunsFilter(
                tags=PipelineRun.tags_for_schedule(reoriginated_schedule)))

        assert len(scheduler_runs) == 0

        try:

            start_time = time.time()

            while True:
                schedule_runs = dagster_instance_for_daemon.get_runs(
                    RunsFilter(tags=PipelineRun.tags_for_schedule(
                        reoriginated_schedule)))

                if len(schedule_runs) > 0:
                    break

                if time.time() - start_time > 120:
                    raise Exception(
                        "Timed out waiting for schedule to start a run. "
                        "Check the dagster-daemon pod logs to see why it didn't start."
                    )

                time.sleep(1)
                continue

        finally:
            dagster_instance_for_daemon.stop_schedule(
                reoriginated_schedule.get_external_origin_id(),
                reoriginated_schedule.selector_id,
                reoriginated_schedule,
            )

        last_run = schedule_runs[0]

        finished_pipeline_run = poll_for_finished_run(
            dagster_instance_for_daemon, last_run.run_id, timeout=120)

        assert finished_pipeline_run.is_success
Esempio n. 3
0
    def _get_queued_runs(self, instance):
        queued_runs_filter = RunsFilter(statuses=[PipelineRunStatus.QUEUED])

        # Reversed for fifo ordering
        # Note: should add a maximum fetch limit https://github.com/dagster-io/dagster/issues/3339
        runs = instance.get_runs(filters=queued_runs_filter)[::-1]
        return runs
Esempio n. 4
0
def execute_monitoring_iteration(instance,
                                 workspace,
                                 logger,
                                 _debug_crash_flags=None):
    check.invariant(instance.run_launcher.supports_check_run_worker_health,
                    "Must use a supported run launcher")

    # TODO: consider limiting number of runs to fetch
    runs = instance.get_runs(filters=RunsFilter(
        statuses=IN_PROGRESS_RUN_STATUSES))

    logger.info(f"Collected {len(runs)} runs for monitoring")

    for run in runs:
        try:
            logger.info(f"Checking run {run.run_id}")

            if run.status == PipelineRunStatus.STARTING:
                monitor_starting_run(instance, run, logger)
            elif run.status == PipelineRunStatus.STARTED:
                monitor_started_run(instance, workspace, run, logger)
            elif run.status == PipelineRunStatus.CANCELING:
                # TODO: implement canceling timeouts
                pass
            else:
                check.invariant(False, f"Unexpected run status: {run.status}")
        except Exception:
            error_info = serializable_error_info_from_exc_info(sys.exc_info())
            logger.error(f"Hit error while monitoring run {run.run_id}: "
                         f"{str(error_info)}")
            yield error_info
        else:
            yield
Esempio n. 5
0
def terminate_pipeline_execution(instance, run_id, terminate_policy):
    from ...schema.errors import GrapheneRunNotFoundError
    from ...schema.pipelines.pipeline import GrapheneRun
    from ...schema.roots.mutation import (
        GrapheneTerminateRunFailure,
        GrapheneTerminateRunPolicy,
        GrapheneTerminateRunSuccess,
    )

    check.inst_param(instance, "instance", DagsterInstance)
    check.str_param(run_id, "run_id")

    records = instance.get_run_records(RunsFilter(run_ids=[run_id]))

    force_mark_as_canceled = (terminate_policy == GrapheneTerminateRunPolicy.
                              MARK_AS_CANCELED_IMMEDIATELY)

    if not records:
        return GrapheneRunNotFoundError(run_id)

    record = records[0]
    run = record.pipeline_run
    graphene_run = GrapheneRun(record)

    valid_status = not run.is_finished and (
        force_mark_as_canceled or (run.status == PipelineRunStatus.STARTED
                                   or run.status == PipelineRunStatus.QUEUED))

    if not valid_status:
        return GrapheneTerminateRunFailure(
            run=graphene_run,
            message=
            "Run {run_id} could not be terminated due to having status {status}."
            .format(run_id=run.run_id, status=run.status.value),
        )

    if force_mark_as_canceled:
        try:
            if instance.run_coordinator and instance.run_coordinator.can_cancel_run(
                    run_id):
                instance.run_coordinator.cancel_run(run_id)
        except:
            instance.report_engine_event(
                "Exception while attempting to force-terminate run. Run will still be marked as canceled.",
                pipeline_name=run.pipeline_name,
                run_id=run.run_id,
                engine_event_data=EngineEventData(
                    error=serializable_error_info_from_exc_info(
                        sys.exc_info()), ),
            )
        return _force_mark_as_canceled(instance, run_id)

    if (instance.run_coordinator
            and instance.run_coordinator.can_cancel_run(run_id)
            and instance.run_coordinator.cancel_run(run_id)):
        return GrapheneTerminateRunSuccess(graphene_run)

    return GrapheneTerminateRunFailure(
        run=graphene_run,
        message="Unable to terminate run {run_id}".format(run_id=run.run_id))
Esempio n. 6
0
def get_run_groups(graphene_info, filters=None, cursor=None, limit=None):
    from ..schema.pipelines.pipeline import GrapheneRun
    from ..schema.runs import GrapheneRunGroup

    check.opt_inst_param(filters, "filters", RunsFilter)
    check.opt_str_param(cursor, "cursor")
    check.opt_int_param(limit, "limit")

    instance = graphene_info.context.instance
    run_groups = instance.get_run_groups(filters=filters,
                                         cursor=cursor,
                                         limit=limit)
    run_ids = {
        run.run_id
        for run_group in run_groups.values()
        for run in run_group.get("runs", [])
    }
    records_by_ids = {
        record.pipeline_run.run_id: record
        for record in instance.get_run_records(
            RunsFilter(run_ids=list(run_ids)))
    }

    for root_run_id in run_groups:
        run_groups[root_run_id]["runs"] = [
            GrapheneRun(records_by_ids.get(run.run_id))
            for run in run_groups[root_run_id]["runs"]
        ]

    return [
        GrapheneRunGroup(root_run_id=root_run_id, runs=run_group["runs"])
        for root_run_id, run_group in run_groups.items()
    ]
Esempio n. 7
0
def get_partition_set_partition_statuses(graphene_info, repository_handle,
                                         partition_set_name):
    from ..schema.partition_sets import GraphenePartitionStatus, GraphenePartitionStatuses

    check.inst_param(repository_handle, "repository_handle", RepositoryHandle)
    check.str_param(partition_set_name, "partition_set_name")
    result = graphene_info.context.get_external_partition_names(
        repository_handle, partition_set_name)
    all_partition_set_runs = graphene_info.context.instance.get_runs(
        RunsFilter(tags={PARTITION_SET_TAG: partition_set_name}))
    runs_by_partition = {}
    for run in all_partition_set_runs:
        partition_name = run.tags.get(PARTITION_NAME_TAG)
        if not partition_name or partition_name in runs_by_partition:
            # all_partition_set_runs is in descending order by creation time, we should ignore
            # runs for the same partition if we've already considered the partition
            continue
        runs_by_partition[partition_name] = run

    return GraphenePartitionStatuses(results=[
        GraphenePartitionStatus(
            id=f"{partition_set_name}:{partition_name}",
            partitionName=partition_name,
            runStatus=runs_by_partition[partition_name].
            status if runs_by_partition.get(partition_name) else None,
        ) for partition_name in result.partition_names
    ])
Esempio n. 8
0
    def to_selector(self):
        if self.tags:
            # We are wrapping self.tags in a list because graphene.List is not marked as iterable
            tags = {tag["key"]: tag["value"] for tag in list(self.tags)}
        else:
            tags = None

        if self.statuses:
            statuses = [
                PipelineRunStatus[status] for status in self.statuses  # pylint: disable=not-an-iterable
            ]
        else:
            statuses = None

        updated_after = pendulum.from_timestamp(
            self.updatedAfter) if self.updatedAfter else None
        created_before = pendulum.from_timestamp(
            self.createdBefore) if self.createdBefore else None

        return RunsFilter(
            run_ids=self.runIds if self.runIds else None,
            pipeline_name=self.pipelineName,
            tags=tags,
            statuses=statuses,
            snapshot_id=self.snapshotId,
            updated_after=updated_after,
            mode=self.mode,
            created_before=created_before,
        )
Esempio n. 9
0
def _get_partitions_chunk(instance, logger, backfill_job, chunk_size):
    check.inst_param(backfill_job, "backfill_job", PartitionBackfill)
    partition_names = backfill_job.partition_names
    checkpoint = backfill_job.last_submitted_partition_name

    if (backfill_job.last_submitted_partition_name
            and backfill_job.last_submitted_partition_name in partition_names):
        index = partition_names.index(
            backfill_job.last_submitted_partition_name)
        partition_names = partition_names[index + 1:]

    # for idempotence, fetch all runs with the current backfill id
    backfill_runs = instance.get_runs(
        RunsFilter(
            tags=PipelineRun.tags_for_backfill_id(backfill_job.backfill_id)))
    completed_partitions = set(
        [run.tags.get(PARTITION_NAME_TAG) for run in backfill_runs])
    initial_checkpoint = (partition_names.index(checkpoint) + 1 if checkpoint
                          and checkpoint in partition_names else 0)
    partition_names = partition_names[initial_checkpoint:]
    has_more = chunk_size < len(partition_names)
    partitions_chunk = partition_names[:chunk_size]
    next_checkpoint = partitions_chunk[-1]

    to_skip = set(partitions_chunk).intersection(completed_partitions)
    if to_skip:
        logger.info(
            f"Found {len(to_skip)} existing runs for backfill {backfill_job.backfill_id}, skipping"
        )
    to_submit = [
        partition_name for partition_name in partitions_chunk
        if partition_name not in completed_partitions
    ]
    return to_submit, next_checkpoint, has_more
Esempio n. 10
0
def _force_mark_as_canceled(instance, run_id):
    from ...schema.pipelines.pipeline import GrapheneRun
    from ...schema.roots.mutation import GrapheneTerminateRunSuccess

    reloaded_record = instance.get_run_records(RunsFilter(run_ids=[run_id]))[0]

    if not reloaded_record.pipeline_run.is_finished:
        message = (
            "This pipeline was forcibly marked as canceled from outside the execution context. The "
            "computational resources created by the run may not have been fully cleaned up."
        )
        instance.report_run_canceled(reloaded_record.pipeline_run,
                                     message=message)
        reloaded_record = instance.get_run_records(
            RunsFilter(run_ids=[run_id]))[0]

    return GrapheneTerminateRunSuccess(GrapheneRun(reloaded_record))
Esempio n. 11
0
def get_run_by_id(graphene_info, run_id):
    from ..schema.errors import GrapheneRunNotFoundError
    from ..schema.pipelines.pipeline import GrapheneRun

    instance = graphene_info.context.instance
    records = instance.get_run_records(RunsFilter(run_ids=[run_id]))
    if not records:
        return GrapheneRunNotFoundError(run_id)
    else:
        return GrapheneRun(records[0])
Esempio n. 12
0
    def resolve_runs(self, graphene_info, **kwargs):
        filters = kwargs.get("filter")
        partition_tags = {
            PARTITION_SET_TAG: self._external_partition_set.name,
            PARTITION_NAME_TAG: self._partition_name,
        }
        if filters is not None:
            filters = filters.to_selector()
            runs_filter = RunsFilter(
                run_ids=filters.run_ids,
                pipeline_name=filters.job_name,
                statuses=filters.statuses,
                tags=merge_dicts(filters.tags, partition_tags),
            )
        else:
            runs_filter = RunsFilter(tags=partition_tags)

        return get_runs(
            graphene_info, runs_filter, cursor=kwargs.get("cursor"), limit=kwargs.get("limit")
        )
Esempio n. 13
0
def _recent_failed_runs_text(instance):
    lines = []
    runs = instance.get_runs(
        limit=5,
        filters=RunsFilter(statuses=[PipelineRunStatus.FAILURE, PipelineRunStatus.CANCELED]),
    )
    if len(runs) <= 0:
        return ""
    for run in runs:
        lines.append("{:<50}{:<50}{:<20}".format(run.run_id, run.pipeline_name, run.status))
    return "Recently failed runs:\n{}".format("\n".join(lines))
Esempio n. 14
0
    def resolve_runs(self, graphene_info, **kwargs):
        from .pipelines.pipeline import GrapheneRun

        filters = RunsFilter.for_backfill(self._backfill_job.backfill_id)
        return [
            GrapheneRun(record)
            for record in graphene_info.context.instance.get_run_records(
                filters=filters,
                limit=kwargs.get("limit"),
            )
        ]
Esempio n. 15
0
def _fetch_runs_by_partition(instance, partition_set_def, status_filters=None):
    # query runs db for this partition set
    filters = RunsFilter(tags={"dagster/partition_set": partition_set_def.name})
    partition_set_runs = instance.get_runs(filters)

    runs_by_partition = defaultdict(list)

    for run in partition_set_runs:
        if not status_filters or run.status in status_filters:
            runs_by_partition[run.tags["dagster/partition"]].append(run)

    return runs_by_partition
Esempio n. 16
0
def get_latest_asset_run_by_step_key(graphene_info, job_names, asset_nodes):
    from ..schema.pipelines.pipeline import GrapheneLatestRun, GrapheneRun

    # This method returns the latest run that has occurred for a given step.
    # Because it is expensive to deserialize PipelineRun objects, we limit this
    # query to retrieving the last 5 runs per job. If no runs have occurred, we return
    # a GrapheneLatestRun object with no run. If none of the latest runs contain the
    # step key, we return None.

    instance = graphene_info.context.instance

    latest_run_by_step: Dict[str, PipelineRun] = {}

    run_records_by_job = {
        job_name: instance.get_run_records(RunsFilter(pipeline_name=job_name),
                                           limit=5)
        for job_name in job_names
    }

    get_step_keys = lambda record: graphene_info.context.instance.get_execution_plan_snapshot(
        record.pipeline_run.execution_plan_snapshot_id).step_keys_to_execute

    # step_keys list is in same order as records list in get_step_keys
    record_step_keys_by_job = {
        job_name: [get_step_keys(record) for record in run_records]
        for job_name, run_records in run_records_by_job.items()
    }

    for asset_node in asset_nodes:
        asset_job_names = asset_node.job_names
        step_key = asset_node.op_name

        total_num_records = sum([
            len(run_records_by_job.get(job_name, []))
            for job_name in asset_job_names
        ])
        if total_num_records == 0:
            latest_run_by_step[step_key] = GrapheneLatestRun(step_key, None)

        latest_run = None
        for job_name in asset_job_names:
            record_step_keys = record_step_keys_by_job.get(job_name, [])
            for i in range(len(record_step_keys)):
                if step_key in record_step_keys[i]:
                    record = run_records_by_job.get(job_name)[i]
                    if latest_run == None or record.create_timestamp > latest_run.create_timestamp:
                        latest_run = record

        if latest_run:
            latest_run_by_step[step_key] = GrapheneLatestRun(
                step_key, GrapheneRun(latest_run))

    return latest_run_by_step
Esempio n. 17
0
def _fetch_existing_runs(instance, external_sensor, run_requests):
    run_keys = [
        run_request.run_key for run_request in run_requests
        if run_request.run_key
    ]

    if not run_keys:
        return {}

    existing_runs = {}

    if instance.supports_bucket_queries:
        runs = instance.get_runs(
            filters=RunsFilter(
                tags=PipelineRun.tags_for_sensor(external_sensor), ),
            bucket_by=TagBucket(
                tag_key=RUN_KEY_TAG,
                bucket_limit=1,
                tag_values=run_keys,
            ),
        )
        for run in runs:
            tags = run.tags or {}
            run_key = tags.get(RUN_KEY_TAG)
            existing_runs[run_key] = run
        return existing_runs

    else:
        for run_key in run_keys:
            runs = instance.get_runs(
                filters=RunsFilter(tags=merge_dicts(
                    PipelineRun.tags_for_sensor(external_sensor),
                    {RUN_KEY_TAG: run_key},
                )),
                limit=1,
            )
            if runs:
                existing_runs[run_key] = runs[0]
    return existing_runs
Esempio n. 18
0
    def resolve_numRequested(self, graphene_info):
        filters = RunsFilter.for_backfill(self._backfill_job.backfill_id)
        run_count = graphene_info.context.instance.get_runs_count(filters)
        if self._backfill_job.status == BulkActionStatus.COMPLETED:
            return len(self._backfill_job.partition_names)

        checkpoint = self._backfill_job.last_submitted_partition_name
        return max(
            run_count,
            self._backfill_job.partition_names.index(checkpoint) +
            1 if checkpoint
            and checkpoint in self._backfill_job.partition_names else 0,
        )
Esempio n. 19
0
def tick_specific_data_from_dagster_tick(graphene_info, tick):
    from ..pipelines.pipeline import GrapheneRun

    if tick.status == TickStatus.SUCCESS:
        if tick.run_ids and graphene_info.context.instance.has_run(
                tick.run_ids[0]):
            record = graphene_info.context.instance.get_run_records(
                RunsFilter(run_ids=[tick.run_ids[0]]))[0]
            return GrapheneScheduleTickSuccessData(run=GrapheneRun(record))
        return GrapheneScheduleTickSuccessData(run=None)
    elif tick.status == TickStatus.FAILURE:
        error = tick.error
        return GrapheneScheduleTickFailureData(error=error)
Esempio n. 20
0
def get_in_progress_runs_by_step(graphene_info, job_names, step_keys):
    from ..schema.pipelines.pipeline import GrapheneInProgressRunsByStep, GrapheneRun

    instance = graphene_info.context.instance

    in_progress_records = []
    for job_name in job_names:
        in_progress_records.extend(
            instance.get_run_records(
                RunsFilter(pipeline_name=job_name, statuses=PENDING_STATUSES)))

    in_progress_runs_by_step = defaultdict(list)
    unstarted_runs_by_step = defaultdict(list)

    for record in in_progress_records:
        run = record.pipeline_run

        asset_names = graphene_info.context.instance.get_execution_plan_snapshot(
            run.execution_plan_snapshot_id).step_keys_to_execute

        if run.status in IN_PROGRESS_STATUSES:
            step_stats = graphene_info.context.instance.get_run_step_stats(
                run.run_id, step_keys)
            for step_stat in step_stats:
                if step_stat.status == StepEventStatus.IN_PROGRESS:
                    in_progress_runs_by_step[step_stat.step_key].append(
                        GrapheneRun(record))

            for step_key in asset_names:
                # step_stats only contains stats for steps that are in progress or complete
                is_unstarted = (len([
                    step_stat for step_stat in step_stats
                    if step_stat.step_key == step_key
                ]) == 0)
                if is_unstarted:
                    unstarted_runs_by_step[step_key].append(
                        GrapheneRun(record))
        else:
            # the run never began execution, all steps are unstarted
            for step_key in asset_names:
                unstarted_runs_by_step[step_key].append(GrapheneRun(record))

    all_step_keys = in_progress_runs_by_step.keys(
    ) | unstarted_runs_by_step.keys()
    return [
        GrapheneInProgressRunsByStep(
            key,
            unstarted_runs_by_step.get(key, []),
            in_progress_runs_by_step.get(key, []),
        ) for key in all_step_keys
    ]
Esempio n. 21
0
def _get_existing_run_for_request(instance, external_schedule, schedule_time, run_request):
    tags = merge_dicts(
        PipelineRun.tags_for_schedule(external_schedule),
        {
            SCHEDULED_EXECUTION_TIME_TAG: to_timezone(schedule_time, "UTC").isoformat(),
        },
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key
    runs_filter = RunsFilter(tags=tags)
    existing_runs = instance.get_runs(runs_filter)
    if not len(existing_runs):
        return None
    return existing_runs[0]
Esempio n. 22
0
def test_0_12_0_add_mode_column(hostname, conn_string):
    _reconstruct_from_file(
        hostname,
        conn_string,
        file_relative_path(
            __file__,
            "snapshot_0_11_16_pre_add_mode_column/postgres/pg_dump.txt"),
    )

    with tempfile.TemporaryDirectory() as tempdir:
        with open(file_relative_path(__file__, "dagster.yaml"),
                  "r") as template_fd:
            with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd:
                template = template_fd.read().format(hostname=hostname)
                target_fd.write(template)

        instance = DagsterInstance.from_config(tempdir)

        # Ensure that you don't get a migration required exception if not trying to use the
        # migration-required column.
        assert len(instance.get_runs()) == 1

        @solid
        def basic():
            pass

        @pipeline
        def noop_pipeline():
            basic()

        # Ensure that you don't get a migration required exception when running a pipeline
        # pre-migration.
        result = execute_pipeline(noop_pipeline, instance=instance)
        assert result.success
        assert len(instance.get_runs()) == 2

        # Ensure that migration required exception throws, since you are trying to use the
        # migration-required column.
        with pytest.raises(
                DagsterInstanceSchemaOutdated,
                match=_migration_regex(current_revision="7cba9eeaaf1d"),
        ):
            instance.get_runs(filters=RunsFilter(mode="the_mode"))

        instance.upgrade()

        result = execute_pipeline(noop_pipeline, instance=instance)
        assert result.success
        assert len(instance.get_runs()) == 3
Esempio n. 23
0
def _launch_pipeline_execution(graphene_info,
                               execution_params,
                               is_reexecuted=False):
    from ...schema.pipelines.pipeline import GrapheneRun
    from ...schema.runs import GrapheneLaunchRunSuccess

    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.inst_param(execution_params, "execution_params", ExecutionParams)
    check.bool_param(is_reexecuted, "is_reexecuted")

    run = do_launch(graphene_info, execution_params, is_reexecuted)
    records = graphene_info.context.instance.get_run_records(
        RunsFilter(run_ids=[run.run_id]))

    return GrapheneLaunchRunSuccess(run=GrapheneRun(records[0]))
Esempio n. 24
0
    def resolve_runs(self, graphene_info, **kwargs):
        from .pipelines.pipeline import GrapheneRun

        if kwargs.get("limit") and self._batch_loader:
            limit = kwargs["limit"]
            records = (self._batch_loader.get_run_records_for_sensor(
                self._instigator_state.name, limit)
                       if self._instigator_state.instigator_type
                       == InstigatorType.SENSOR else
                       self._batch_loader.get_run_records_for_schedule(
                           self._instigator_state.name, limit))
            return [GrapheneRun(record) for record in records]

        if self._instigator_state.instigator_type == InstigatorType.SENSOR:
            filters = RunsFilter.for_sensor(self._instigator_state)
        else:
            filters = RunsFilter.for_schedule(self._instigator_state)
        return [
            GrapheneRun(record)
            for record in graphene_info.context.instance.get_run_records(
                filters=filters,
                limit=kwargs.get("limit"),
            )
        ]
Esempio n. 25
0
    def resolve_runs(self, graphene_info):
        from .pipelines.pipeline import GrapheneRun

        instance = graphene_info.context.instance
        run_ids = self._tick.origin_run_ids or self._tick.run_ids
        if not run_ids:
            return []

        records_by_id = {
            record.pipeline_run.run_id: record
            for record in instance.get_run_records(RunsFilter(run_ids=run_ids))
        }

        return [
            GrapheneRun(records_by_id[run_id]) for run_id in run_ids
            if run_id in records_by_id
        ]
Esempio n. 26
0
def _fetch_last_run(instance, external_partition_set, partition_name):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(external_partition_set, "external_partition_set",
                     ExternalPartitionSet)
    check.str_param(partition_name, "partition_name")

    runs = instance.get_runs(
        RunsFilter(
            pipeline_name=external_partition_set.pipeline_name,
            tags={
                PARTITION_SET_TAG: external_partition_set.name,
                PARTITION_NAME_TAG: partition_name,
            },
        ),
        limit=1,
    )

    return runs[0] if runs else None
Esempio n. 27
0
def last_empty_partition(context, partition_set_def):
    check.inst_param(context, "context", ScheduleEvaluationContext)
    partition_set_def = check.inst_param(partition_set_def,
                                         "partition_set_def",
                                         PartitionSetDefinition)

    partitions = partition_set_def.get_partitions(
        context.scheduled_execution_time)
    if not partitions:
        return None
    selected = None
    for partition in reversed(partitions):
        filters = RunsFilter.for_partition(partition_set_def, partition)
        matching = context.instance.get_runs(filters)
        if not any(run.status == PipelineRunStatus.SUCCESS
                   for run in matching):
            selected = partition
            break
    return selected
Esempio n. 28
0
    def resolve_stepStats(self, _graphene_info, limit):
        if self._solid.get_is_dynamic_mapped():
            return GrapheneSolidStepStatsUnavailableError(
                message="Step stats are not available for dynamically-mapped ops"
            )

        if self._solid.get_is_composite():
            return GrapheneSolidStepStatsUnavailableError(
                message="Step stats are not available for composite solids / subgraphs"
            )

        instance = _graphene_info.context.instance
        runs_filter = RunsFilter(pipeline_name=self._solid.get_pipeline_name())
        runs = instance.get_runs(runs_filter, limit=limit)
        nodes = []
        for run in runs:
            stats = instance.get_run_step_stats(run.run_id, [str(self.handleID)])
            if len(stats):
                nodes.append(GrapheneRunStepStats(stats[0]))
        return GrapheneSolidStepStatsConnection(nodes=nodes)
Esempio n. 29
0
def get_pipeline_run_observable(graphene_info, run_id, after=None):
    from ...schema.pipelines.pipeline import GrapheneRun
    from ...schema.pipelines.subscription import (
        GraphenePipelineRunLogsSubscriptionFailure,
        GraphenePipelineRunLogsSubscriptionSuccess,
    )
    from ..events import from_event_record

    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.str_param(run_id, "run_id")
    check.opt_int_param(after, "after")
    instance = graphene_info.context.instance
    records = instance.get_run_records(RunsFilter(run_ids=[run_id]))

    if not records:

        def _get_error_observable(observer):
            observer.on_next(
                GraphenePipelineRunLogsSubscriptionFailure(
                    missingRunId=run_id,
                    message="Could not load run with id {}".format(run_id)))

        return Observable.create(_get_error_observable)  # pylint: disable=E1101

    record = records[0]
    run = record.pipeline_run

    def _handle_events(payload):
        events, loading_past = payload
        return GraphenePipelineRunLogsSubscriptionSuccess(
            run=GrapheneRun(record),
            messages=[
                from_event_record(event, run.pipeline_name) for event in events
            ],
            hasMorePastEvents=loading_past,
        )

    # pylint: disable=E1101
    return Observable.create(
        PipelineRunObservableSubscribe(instance, run_id,
                                       after_cursor=after)).map(_handle_events)
Esempio n. 30
0
def poll_for_finished_run(instance, run_id=None, timeout=20, run_tags=None):
    total_time = 0
    interval = 0.01

    filters = RunsFilter(
        run_ids=[run_id] if run_id else None,
        tags=run_tags,
        statuses=[
            PipelineRunStatus.SUCCESS, PipelineRunStatus.FAILURE,
            PipelineRunStatus.CANCELED
        ],
    )

    while True:
        runs = instance.get_runs(filters, limit=1)
        if runs:
            return runs[0]
        else:
            time.sleep(interval)
            total_time += interval
            if total_time > timeout:
                raise Exception("Timed out")