コード例 #1
0
def execute_preview_command(
    sensor_name, since, last_run_key, cursor, cli_args, print_fn, instance=None
):
    with DagsterInstance.get() as instance:
        with get_repository_location_from_kwargs(cli_args) as repo_location:
            try:
                external_repo = get_external_repository_from_repo_location(
                    repo_location, cli_args.get("repository")
                )
                check_repo_and_scheduler(external_repo, instance)
                external_sensor = external_repo.get_external_sensor(sensor_name)
                try:
                    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
                        instance,
                        external_repo.handle,
                        external_sensor.name,
                        since,
                        last_run_key,
                        cursor,
                    )
                except Exception:  # pylint: disable=broad-except
                    error_info = serializable_error_info_from_exc_info(sys.exc_info())
                    print_fn(
                        "Failed to resolve sensor for {sensor_name} : {error_info}".format(
                            sensor_name=external_sensor.name,
                            error_info=error_info.to_string(),
                        )
                    )
                    return

                if not sensor_runtime_data.run_requests:
                    if sensor_runtime_data.skip_message:
                        print_fn(
                            "Sensor returned false for {sensor_name}, skipping: {skip_message}".format(
                                sensor_name=external_sensor.name,
                                skip_message=sensor_runtime_data.skip_message,
                            )
                        )
                    else:
                        print_fn(
                            "Sensor returned false for {sensor_name}, skipping".format(
                                sensor_name=external_sensor.name
                            )
                        )
                else:
                    print_fn(
                        "Sensor returning run requests for {num} run(s):\n\n{run_requests}".format(
                            num=len(sensor_runtime_data.run_requests),
                            run_requests="\n".join(
                                yaml.safe_dump(run_request.run_config, default_flow_style=False)
                                for run_request in sensor_runtime_data.run_requests
                            ),
                        )
                    )

            except DagsterInvariantViolationError as ex:
                raise click.UsageError(ex)
コード例 #2
0
def _logged_pipeline_launch_command(config, preset, mode, instance, kwargs):
    check.inst_param(instance, 'instance', DagsterInstance)
    env = (
        canonicalize_backcompat_args(
            (config if config else None),
            '--config',
            (kwargs.get('env') if kwargs.get('env') else None),
            '--env',
            '0.9.0',
            stacklevel=2,  # this stacklevel can point the warning to this line
        ) or tuple()  # back to default empty tuple
    )

    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))

    repo_location = get_repository_location_from_kwargs(kwargs, instance)
    external_repo = get_external_repository_from_repo_location(
        repo_location, kwargs.get('repository'))
    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        kwargs.get('pipeline'),
    )

    log_external_repo_stats(
        instance=instance,
        external_pipeline=external_pipeline,
        external_repo=external_repo,
        source='pipeline_launch_command',
    )

    if preset:
        if env:
            raise click.UsageError('Can not use --preset with --config.')

        preset = external_pipeline.get_preset(preset)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    solid_selection = get_solid_selection_from_args(kwargs)

    pipeline_run = _create_external_pipeline_run(
        instance=instance,
        repo_location=repo_location,
        external_repo=external_repo,
        external_pipeline=external_pipeline,
        run_config=get_run_config_from_env_file_list(env),
        mode=mode,
        preset=preset,
        tags=run_tags,
        solid_selection=solid_selection,
    )

    return instance.launch_run(pipeline_run.run_id, external_pipeline)
コード例 #3
0
ファイル: api.py プロジェクト: xaniasd/dagster
def launch_scheduled_execution(output_file, schedule_name, **kwargs):
    with ipc_write_stream(output_file) as stream:
        with DagsterInstance.get() as instance:
            repository_origin = get_repository_origin_from_kwargs(kwargs)
            job_origin = repository_origin.get_job_origin(schedule_name)

            # open the tick scope before we load any external artifacts so that
            # load errors are stored in DB
            with _schedule_tick_state(
                    instance,
                    stream,
                    JobTickData(
                        job_origin_id=job_origin.get_id(),
                        job_name=schedule_name,
                        job_type=JobType.SCHEDULE,
                        status=JobTickStatus.STARTED,
                        timestamp=time.time(),
                    ),
            ) as tick_context:
                with get_repository_location_from_kwargs(
                        kwargs) as repo_location:
                    repo_dict = repo_location.get_repositories()
                    check.invariant(
                        repo_dict and len(repo_dict) == 1,
                        "Passed in arguments should reference exactly one repository, instead there are {num_repos}"
                        .format(num_repos=len(repo_dict)),
                    )
                    external_repo = next(iter(repo_dict.values()))
                    check.invariant(
                        schedule_name in [
                            schedule.name for schedule in
                            external_repo.get_external_schedules()
                        ],
                        "Could not find schedule named {schedule_name}".format(
                            schedule_name=schedule_name),
                    )
                    external_schedule = external_repo.get_external_schedule(
                        schedule_name)
                    tick_context.update_with_status(
                        status=JobTickStatus.STARTED)
                    _launch_scheduled_execution(
                        instance,
                        repo_location,
                        external_repo,
                        external_schedule,
                        tick_context,
                        stream,
                    )
コード例 #4
0
ファイル: sensor.py プロジェクト: trevenrawr/dagster
def execute_cursor_command(sensor_name, cli_args, print_fn):
    with DagsterInstance.get() as instance:
        with get_repository_location_from_kwargs(
                instance, version=dagster_version,
                kwargs=cli_args) as repo_location:
            if bool(cli_args.get("delete")) == bool(cli_args.get("set")):
                # must use one of delete/set
                raise click.UsageError(
                    "Must set cursor using `--set <value>` or use `--delete`")

            cursor_value = cli_args.get("set")

            external_repo = get_external_repository_from_repo_location(
                repo_location, cli_args.get("repository"))
            check_repo_and_scheduler(external_repo, instance)
            external_sensor = external_repo.get_external_sensor(sensor_name)
            job_state = instance.get_instigator_state(
                external_sensor.get_external_origin_id(),
                external_sensor.selector_id)
            if not job_state:
                instance.add_instigator_state(
                    InstigatorState(
                        external_sensor.get_external_origin(),
                        InstigatorType.SENSOR,
                        InstigatorStatus.STOPPED,
                        SensorInstigatorData(
                            min_interval=external_sensor.min_interval_seconds,
                            cursor=cursor_value),
                    ))
            else:
                instance.update_instigator_state(
                    job_state.with_data(
                        SensorInstigatorData(
                            last_tick_timestamp=job_state.instigator_data.
                            last_tick_timestamp,
                            last_run_key=job_state.instigator_data.
                            last_run_key,
                            min_interval=external_sensor.min_interval_seconds,
                            cursor=cursor_value,
                        ), ))
            if cursor_value:
                print_fn(
                    f'Set cursor state for sensor {external_sensor.name} to "{cursor_value}"'
                )
            else:
                print_fn(
                    f"Cleared cursor state for sensor {external_sensor.name}")
コード例 #5
0
ファイル: api.py プロジェクト: rohithreddy/dagster
def launch_scheduled_execution(output_file, schedule_name, **kwargs):
    with ipc_write_stream(output_file) as stream:
        instance = DagsterInstance.get()
        repository_origin = get_repository_origin_from_kwargs(kwargs)
        schedule_origin = repository_origin.get_schedule_origin(schedule_name)

        # open the tick scope before we load any external artifacts so that
        # load errors are stored in DB
        with _schedule_tick_state(
                instance,
                stream,
                ScheduleTickData(
                    schedule_origin_id=schedule_origin.get_id(),
                    schedule_name=schedule_name,
                    timestamp=time.time(),
                    cron_schedule=None,  # not yet loaded
                    status=ScheduleTickStatus.STARTED,
                ),
        ) as tick:
            repo_location = get_repository_location_from_kwargs(
                kwargs, instance)
            repo_dict = repo_location.get_repositories()
            check.invariant(
                repo_dict and len(repo_dict) == 1,
                'Passed in arguments should reference exactly one repository, instead there are {num_repos}'
                .format(num_repos=len(repo_dict)),
            )
            external_repo = next(iter(repo_dict.values()))
            check.invariant(
                schedule_name in [
                    schedule.name
                    for schedule in external_repo.get_external_schedules()
                ],
                'Could not find schedule named {schedule_name}'.format(
                    schedule_name=schedule_name),
            )
            external_schedule = external_repo.get_external_schedule(
                schedule_name)
            tick.update_with_status(
                status=ScheduleTickStatus.STARTED,
                cron_schedule=external_schedule.cron_schedule,
            )
            _launch_scheduled_execution(instance, repo_location, external_repo,
                                        external_schedule, tick, stream)
コード例 #6
0
ファイル: pipeline.py プロジェクト: substringgmbh/dagster
def execute_launch_command(instance, kwargs):
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")
    check.inst_param(instance, "instance", DagsterInstance)
    config = get_config_from_args(kwargs)

    with get_repository_location_from_kwargs(kwargs) as repo_location:
        external_repo = get_external_repository_from_repo_location(
            repo_location, kwargs.get("repository")
        )
        external_pipeline = get_external_pipeline_from_external_repo(
            external_repo, kwargs.get("pipeline")
        )

        log_external_repo_stats(
            instance=instance,
            external_pipeline=external_pipeline,
            external_repo=external_repo,
            source="pipeline_launch_command",
        )

        if preset and config:
            raise click.UsageError("Can not use --preset with -c / --config / --config-json.")

        run_tags = get_tags_from_args(kwargs)

        solid_selection = get_solid_selection_from_args(kwargs)

        pipeline_run = _create_external_pipeline_run(
            instance=instance,
            repo_location=repo_location,
            external_repo=external_repo,
            external_pipeline=external_pipeline,
            run_config=config,
            mode=mode,
            preset=preset,
            tags=run_tags,
            solid_selection=solid_selection,
            run_id=kwargs.get("run_id"),
        )

        return instance.submit_run(pipeline_run.run_id, external_pipeline)
コード例 #7
0
ファイル: pipeline.py プロジェクト: mikaylaedwards/dagster
def execute_launch_command(instance, kwargs):
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")
    check.inst_param(instance, "instance", DagsterInstance)
    config = list(check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str))

    with get_repository_location_from_kwargs(kwargs, instance) as repo_location:
        external_repo = get_external_repository_from_repo_location(
            repo_location, kwargs.get("repository")
        )
        external_pipeline = get_external_pipeline_from_external_repo(
            external_repo, kwargs.get("pipeline"),
        )

        log_external_repo_stats(
            instance=instance,
            external_pipeline=external_pipeline,
            external_repo=external_repo,
            source="pipeline_launch_command",
        )

        if preset and config:
            raise click.UsageError("Can not use --preset with --config.")

        run_tags = get_tags_from_args(kwargs)

        solid_selection = get_solid_selection_from_args(kwargs)

        pipeline_run = _create_external_pipeline_run(
            instance=instance,
            repo_location=repo_location,
            external_repo=external_repo,
            external_pipeline=external_pipeline,
            run_config=get_run_config_from_file_list(config),
            mode=mode,
            preset=preset,
            tags=run_tags,
            solid_selection=solid_selection,
        )

        return instance.launch_run(pipeline_run.run_id, external_pipeline)
コード例 #8
0
ファイル: pipeline.py プロジェクト: drat/dagster
def execute_backfill_command(cli_args, print_fn, instance):
    with get_repository_location_from_kwargs(cli_args) as repo_location:
        _execute_backfill_command_at_location(cli_args, print_fn, instance,
                                              repo_location)
コード例 #9
0
def launch_scheduled_execution(output_file, schedule_name,
                               override_system_timezone, **kwargs):
    with (mock_system_timezone(override_system_timezone)
          if override_system_timezone else nullcontext()):
        with ipc_write_stream(output_file) as stream:
            with DagsterInstance.get() as instance:
                repository_origin = get_repository_origin_from_kwargs(kwargs)
                job_origin = repository_origin.get_job_origin(schedule_name)

                # open the tick scope before we load any external artifacts so that
                # load errors are stored in DB
                with _schedule_tick_context(
                        instance,
                        stream,
                        JobTickData(
                            job_origin_id=job_origin.get_id(),
                            job_name=schedule_name,
                            job_type=JobType.SCHEDULE,
                            status=JobTickStatus.STARTED,
                            timestamp=time.time(),
                        ),
                ) as tick_context:
                    with get_repository_location_from_kwargs(
                            kwargs) as repo_location:
                        repo_dict = repo_location.get_repositories()
                        check.invariant(
                            repo_dict and len(repo_dict) == 1,
                            "Passed in arguments should reference exactly one repository, instead there are {num_repos}"
                            .format(num_repos=len(repo_dict)),
                        )
                        external_repo = next(iter(repo_dict.values()))
                        if not schedule_name in [
                                schedule.name for schedule in
                                external_repo.get_external_schedules()
                        ]:
                            raise DagsterInvariantViolationError(
                                "Could not find schedule named {schedule_name}"
                                .format(schedule_name=schedule_name), )

                        external_schedule = external_repo.get_external_schedule(
                            schedule_name)

                        # Validate that either the schedule has no timezone or it matches
                        # the system timezone
                        schedule_timezone = external_schedule.execution_timezone
                        if schedule_timezone:
                            system_timezone = pendulum.now().timezone.name

                            if system_timezone != external_schedule.execution_timezone:
                                raise DagsterInvariantViolationError(
                                    "Schedule {schedule_name} is set to execute in {schedule_timezone}, "
                                    "but this scheduler can only run in the system timezone, "
                                    "{system_timezone}. Use DagsterDaemonScheduler if you want to be able "
                                    "to execute schedules in arbitrary timezones."
                                    .format(
                                        schedule_name=external_schedule.name,
                                        schedule_timezone=schedule_timezone,
                                        system_timezone=system_timezone,
                                    ), )

                        _launch_scheduled_executions(instance, repo_location,
                                                     external_repo,
                                                     external_schedule,
                                                     tick_context)
コード例 #10
0
def execute_backfill_command(cli_args, print_fn, instance=None):
    instance = instance or DagsterInstance.get()
    repo_location = get_repository_location_from_kwargs(cli_args, instance)
    external_repo = get_external_repository_from_repo_location(
        repo_location, cli_args.get('repository'))

    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        cli_args.get('pipeline'),
    )

    noprompt = cli_args.get('noprompt')

    pipeline_partition_set_names = {
        external_partition_set.name: external_partition_set
        for external_partition_set in
        external_repo.get_external_partition_sets()
        if external_partition_set.pipeline_name == external_pipeline.name
    }

    if not pipeline_partition_set_names:
        raise click.UsageError(
            'No partition sets found for pipeline `{}`'.format(
                external_pipeline.name))
    partition_set_name = cli_args.get('partition_set')
    if not partition_set_name:
        if len(pipeline_partition_set_names) == 1:
            partition_set_name = next(iter(
                pipeline_partition_set_names.keys()))
        elif noprompt:
            raise click.UsageError(
                'No partition set specified (see option `--partition-set`)')
        else:
            partition_set_name = click.prompt(
                'Select a partition set to use for backfill: {}'.format(
                    ', '.join(x for x in pipeline_partition_set_names.keys())))

    partition_set = pipeline_partition_set_names.get(partition_set_name)

    if not partition_set:
        raise click.UsageError(
            'No partition set found named `{}`'.format(partition_set_name))

    mode = partition_set.mode
    solid_selection = partition_set.solid_selection

    repo_handle = RepositoryHandle(
        repository_name=external_repo.name,
        repository_location_handle=repo_location.location_handle,
    )

    # Resolve partitions to backfill
    partition_names_or_error = repo_location.get_external_partition_names(
        repo_handle,
        partition_set_name,
    )

    if isinstance(partition_names_or_error,
                  ExternalPartitionExecutionErrorData):
        raise DagsterBackfillFailedError(
            'Failure fetching partition names for {partition_set_name}: {error_message}'
            .format(
                partition_set_name=partition_set_name,
                error_message=partition_names_or_error.error.message,
            ),
            serialized_error_info=partition_names_or_error.error,
        )

    partition_names = gen_partition_names_from_args(
        partition_names_or_error.partition_names, cli_args)

    # Print backfill info
    print_fn('\n     Pipeline: {}'.format(external_pipeline.name))
    print_fn('Partition set: {}'.format(partition_set_name))
    print_fn('   Partitions: {}\n'.format(
        print_partition_format(partition_names, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
            'Do you want to proceed with the backfill ({} partitions)?'.format(
                len(partition_names))):

        print_fn('Launching runs... ')
        backfill_id = make_new_backfill_id()

        run_tags = merge_dicts(
            PipelineRun.tags_for_backfill_id(backfill_id),
            get_tags_from_args(cli_args),
        )

        for partition_name in partition_names:
            run_config_or_error = repo_location.get_external_partition_config(
                repo_handle, partition_set_name, partition_name)
            if isinstance(run_config_or_error,
                          ExternalPartitionExecutionErrorData):
                raise DagsterBackfillFailedError(
                    'Failure fetching run config for partition {partition_name} in {partition_set_name}: {error_message}'
                    .format(
                        partition_name=partition_name,
                        partition_set_name=partition_set_name,
                        error_message=run_config_or_error.error.message,
                    ),
                    serialized_error_info=run_config_or_error.error,
                )

            tags_or_error = repo_location.get_external_partition_tags(
                repo_handle, partition_set_name, partition_name)
            if isinstance(tags_or_error, ExternalPartitionExecutionErrorData):
                raise DagsterBackfillFailedError(
                    'Failure fetching tags for partition {partition_name} in {partition_set_name}: {error_message}'
                    .format(
                        partition_name=partition_name,
                        partition_set_name=partition_set_name,
                        error_message=tags_or_error.error.message,
                    ),
                    serialized_error_info=tags_or_error.error,
                )
            run = _create_external_pipeline_run(
                instance=instance,
                repo_location=repo_location,
                external_repo=external_repo,
                external_pipeline=external_pipeline,
                run_config=run_config_or_error.run_config,
                mode=mode,
                preset=None,
                tags=merge_dicts(tags_or_error.tags, run_tags),
                solid_selection=frozenset(solid_selection)
                if solid_selection else None,
            )

            instance.launch_run(run.run_id, external_pipeline)
            # Remove once we can handle synchronous execution... currently limited by sqlite
            time.sleep(0.1)

        print_fn('Launched backfill job `{}`'.format(backfill_id))
    else:
        print_fn('Aborted!')