예제 #1
0
def execute_preview_command(
    sensor_name, since, last_run_key, cursor, cli_args, print_fn, instance=None
):
    with DagsterInstance.get() as instance:
        with get_repository_location_from_kwargs(cli_args) as repo_location:
            try:
                external_repo = get_external_repository_from_repo_location(
                    repo_location, cli_args.get("repository")
                )
                check_repo_and_scheduler(external_repo, instance)
                external_sensor = external_repo.get_external_sensor(sensor_name)
                try:
                    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
                        instance,
                        external_repo.handle,
                        external_sensor.name,
                        since,
                        last_run_key,
                        cursor,
                    )
                except Exception:  # pylint: disable=broad-except
                    error_info = serializable_error_info_from_exc_info(sys.exc_info())
                    print_fn(
                        "Failed to resolve sensor for {sensor_name} : {error_info}".format(
                            sensor_name=external_sensor.name,
                            error_info=error_info.to_string(),
                        )
                    )
                    return

                if not sensor_runtime_data.run_requests:
                    if sensor_runtime_data.skip_message:
                        print_fn(
                            "Sensor returned false for {sensor_name}, skipping: {skip_message}".format(
                                sensor_name=external_sensor.name,
                                skip_message=sensor_runtime_data.skip_message,
                            )
                        )
                    else:
                        print_fn(
                            "Sensor returned false for {sensor_name}, skipping".format(
                                sensor_name=external_sensor.name
                            )
                        )
                else:
                    print_fn(
                        "Sensor returning run requests for {num} run(s):\n\n{run_requests}".format(
                            num=len(sensor_runtime_data.run_requests),
                            run_requests="\n".join(
                                yaml.safe_dump(run_request.run_config, default_flow_style=False)
                                for run_request in sensor_runtime_data.run_requests
                            ),
                        )
                    )

            except DagsterInvariantViolationError as ex:
                raise click.UsageError(ex)
예제 #2
0
def _logged_pipeline_launch_command(config, preset, mode, instance, kwargs):
    check.inst_param(instance, 'instance', DagsterInstance)
    env = (
        canonicalize_backcompat_args(
            (config if config else None),
            '--config',
            (kwargs.get('env') if kwargs.get('env') else None),
            '--env',
            '0.9.0',
            stacklevel=2,  # this stacklevel can point the warning to this line
        ) or tuple()  # back to default empty tuple
    )

    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))

    repo_location = get_repository_location_from_kwargs(kwargs, instance)
    external_repo = get_external_repository_from_repo_location(
        repo_location, kwargs.get('repository'))
    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        kwargs.get('pipeline'),
    )

    log_external_repo_stats(
        instance=instance,
        external_pipeline=external_pipeline,
        external_repo=external_repo,
        source='pipeline_launch_command',
    )

    if preset:
        if env:
            raise click.UsageError('Can not use --preset with --config.')

        preset = external_pipeline.get_preset(preset)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    solid_selection = get_solid_selection_from_args(kwargs)

    pipeline_run = _create_external_pipeline_run(
        instance=instance,
        repo_location=repo_location,
        external_repo=external_repo,
        external_pipeline=external_pipeline,
        run_config=get_run_config_from_env_file_list(env),
        mode=mode,
        preset=preset,
        tags=run_tags,
        solid_selection=solid_selection,
    )

    return instance.launch_run(pipeline_run.run_id, external_pipeline)
예제 #3
0
def execute_launch_command(instance: DagsterInstance,
                           kwargs: Dict[str, str],
                           using_job_op_graph_apis: bool = False):
    preset = cast(Optional[str], kwargs.get("preset"))
    mode = cast(Optional[str], kwargs.get("mode"))
    check.inst_param(instance, "instance", DagsterInstance)
    config = get_config_from_args(kwargs)

    with get_workspace_from_kwargs(instance,
                                   version=dagster_version,
                                   kwargs=kwargs) as workspace:
        repo_location = get_repository_location_from_workspace(
            workspace, kwargs.get("location"))
        external_repo = get_external_repository_from_repo_location(
            repo_location, cast(Optional[str], kwargs.get("repository")))
        external_pipeline = get_external_pipeline_or_job_from_external_repo(
            external_repo,
            cast(Optional[str], kwargs.get("pipeline_or_job")),
            using_job_op_graph_apis,
        )

        log_external_repo_stats(
            instance=instance,
            external_pipeline=external_pipeline,
            external_repo=external_repo,
            source="pipeline_launch_command",
        )

        if preset and config:
            raise click.UsageError(
                "Can not use --preset with -c / --config / --config-json.")

        run_tags = get_tags_from_args(kwargs)

        solid_selection = get_solid_selection_from_args(kwargs)

        pipeline_run = _create_external_pipeline_run(
            instance=instance,
            repo_location=repo_location,
            external_repo=external_repo,
            external_pipeline=external_pipeline,
            run_config=config,
            mode=mode,
            preset=preset,
            tags=run_tags,
            solid_selection=solid_selection,
            run_id=cast(Optional[str], kwargs.get("run_id")),
        )

        return instance.submit_run(pipeline_run.run_id, workspace)
예제 #4
0
def execute_cursor_command(sensor_name, cli_args, print_fn):
    with DagsterInstance.get() as instance:
        with get_repository_location_from_kwargs(
                instance, version=dagster_version,
                kwargs=cli_args) as repo_location:
            if bool(cli_args.get("delete")) == bool(cli_args.get("set")):
                # must use one of delete/set
                raise click.UsageError(
                    "Must set cursor using `--set <value>` or use `--delete`")

            cursor_value = cli_args.get("set")

            external_repo = get_external_repository_from_repo_location(
                repo_location, cli_args.get("repository"))
            check_repo_and_scheduler(external_repo, instance)
            external_sensor = external_repo.get_external_sensor(sensor_name)
            job_state = instance.get_instigator_state(
                external_sensor.get_external_origin_id(),
                external_sensor.selector_id)
            if not job_state:
                instance.add_instigator_state(
                    InstigatorState(
                        external_sensor.get_external_origin(),
                        InstigatorType.SENSOR,
                        InstigatorStatus.STOPPED,
                        SensorInstigatorData(
                            min_interval=external_sensor.min_interval_seconds,
                            cursor=cursor_value),
                    ))
            else:
                instance.update_instigator_state(
                    job_state.with_data(
                        SensorInstigatorData(
                            last_tick_timestamp=job_state.instigator_data.
                            last_tick_timestamp,
                            last_run_key=job_state.instigator_data.
                            last_run_key,
                            min_interval=external_sensor.min_interval_seconds,
                            cursor=cursor_value,
                        ), ))
            if cursor_value:
                print_fn(
                    f'Set cursor state for sensor {external_sensor.name} to "{cursor_value}"'
                )
            else:
                print_fn(
                    f"Cleared cursor state for sensor {external_sensor.name}")
예제 #5
0
def execute_launch_command(instance, kwargs):
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")
    check.inst_param(instance, "instance", DagsterInstance)
    config = get_config_from_args(kwargs)

    with get_repository_location_from_kwargs(kwargs) as repo_location:
        external_repo = get_external_repository_from_repo_location(
            repo_location, kwargs.get("repository")
        )
        external_pipeline = get_external_pipeline_from_external_repo(
            external_repo, kwargs.get("pipeline")
        )

        log_external_repo_stats(
            instance=instance,
            external_pipeline=external_pipeline,
            external_repo=external_repo,
            source="pipeline_launch_command",
        )

        if preset and config:
            raise click.UsageError("Can not use --preset with -c / --config / --config-json.")

        run_tags = get_tags_from_args(kwargs)

        solid_selection = get_solid_selection_from_args(kwargs)

        pipeline_run = _create_external_pipeline_run(
            instance=instance,
            repo_location=repo_location,
            external_repo=external_repo,
            external_pipeline=external_pipeline,
            run_config=config,
            mode=mode,
            preset=preset,
            tags=run_tags,
            solid_selection=solid_selection,
            run_id=kwargs.get("run_id"),
        )

        return instance.submit_run(pipeline_run.run_id, external_pipeline)
예제 #6
0
def execute_launch_command(instance, kwargs):
    preset = kwargs.get("preset")
    mode = kwargs.get("mode")
    check.inst_param(instance, "instance", DagsterInstance)
    config = list(check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str))

    with get_repository_location_from_kwargs(kwargs, instance) as repo_location:
        external_repo = get_external_repository_from_repo_location(
            repo_location, kwargs.get("repository")
        )
        external_pipeline = get_external_pipeline_from_external_repo(
            external_repo, kwargs.get("pipeline"),
        )

        log_external_repo_stats(
            instance=instance,
            external_pipeline=external_pipeline,
            external_repo=external_repo,
            source="pipeline_launch_command",
        )

        if preset and config:
            raise click.UsageError("Can not use --preset with --config.")

        run_tags = get_tags_from_args(kwargs)

        solid_selection = get_solid_selection_from_args(kwargs)

        pipeline_run = _create_external_pipeline_run(
            instance=instance,
            repo_location=repo_location,
            external_repo=external_repo,
            external_pipeline=external_pipeline,
            run_config=get_run_config_from_file_list(config),
            mode=mode,
            preset=preset,
            tags=run_tags,
            solid_selection=solid_selection,
        )

        return instance.launch_run(pipeline_run.run_id, external_pipeline)
예제 #7
0
def _execute_backfill_command_at_location(cli_args, print_fn, instance, workspace, repo_location):
    external_repo = get_external_repository_from_repo_location(
        repo_location, cli_args.get("repository")
    )

    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        cli_args.get("pipeline"),
    )

    noprompt = cli_args.get("noprompt")

    pipeline_partition_set_names = {
        external_partition_set.name: external_partition_set
        for external_partition_set in external_repo.get_external_partition_sets()
        if external_partition_set.pipeline_name == external_pipeline.name
    }

    if not pipeline_partition_set_names:
        raise click.UsageError(
            "No partition sets found for pipeline `{}`".format(external_pipeline.name)
        )
    partition_set_name = cli_args.get("partition_set")
    if not partition_set_name:
        if len(pipeline_partition_set_names) == 1:
            partition_set_name = next(iter(pipeline_partition_set_names.keys()))
        elif noprompt:
            raise click.UsageError("No partition set specified (see option `--partition-set`)")
        else:
            partition_set_name = click.prompt(
                "Select a partition set to use for backfill: {}".format(
                    ", ".join(x for x in pipeline_partition_set_names.keys())
                )
            )

    partition_set = pipeline_partition_set_names.get(partition_set_name)

    if not partition_set:
        raise click.UsageError("No partition set found named `{}`".format(partition_set_name))

    run_tags = get_tags_from_args(cli_args)

    repo_handle = RepositoryHandle(
        repository_name=external_repo.name,
        repository_location=repo_location,
    )

    try:
        partition_names_or_error = repo_location.get_external_partition_names(
            repo_handle,
            partition_set_name,
        )
    except Exception:  # pylint: disable=broad-except
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        raise DagsterBackfillFailedError(
            "Failure fetching partition names for {partition_set_name}: {error_message}".format(
                partition_set_name=partition_set_name,
                error_message=error_info.message,
            ),
            serialized_error_info=error_info,
        )

    partition_names = gen_partition_names_from_args(
        partition_names_or_error.partition_names, cli_args
    )

    # Print backfill info
    print_fn("\n     Pipeline: {}".format(external_pipeline.name))
    print_fn("Partition set: {}".format(partition_set_name))
    print_fn("   Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
        "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names))
    ):

        print_fn("Launching runs... ")

        backfill_id = make_new_backfill_id()
        backfill_job = PartitionBackfill(
            backfill_id=backfill_id,
            partition_set_origin=partition_set.get_external_origin(),
            status=BulkActionStatus.REQUESTED,
            partition_names=partition_names,
            from_failure=False,
            reexecution_steps=None,
            tags=run_tags,
            backfill_timestamp=pendulum.now("UTC").timestamp(),
        )
        try:
            partition_execution_data = (
                repo_location.get_external_partition_set_execution_param_data(
                    repository_handle=repo_handle,
                    partition_set_name=partition_set_name,
                    partition_names=partition_names,
                )
            )
        except Exception:  # pylint: disable=broad-except
            error_info = serializable_error_info_from_exc_info(sys.exc_info())
            instance.add_backfill(
                backfill_job.with_status(BulkActionStatus.FAILED).with_error(error_info)
            )
            return print_fn("Backfill failed: {}".format(error_info))

        assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData)

        for partition_data in partition_execution_data.partition_data:
            pipeline_run = create_backfill_run(
                instance,
                repo_location,
                external_pipeline,
                partition_set,
                backfill_job,
                partition_data,
            )
            if pipeline_run:
                instance.submit_run(pipeline_run.run_id, workspace)

        instance.add_backfill(backfill_job.with_status(BulkActionStatus.COMPLETED))

        print_fn("Launched backfill job `{}`".format(backfill_id))

    else:
        print_fn("Aborted!")
예제 #8
0
def _execute_backfill_command_at_location(cli_args, print_fn, instance, repo_location):
    external_repo = get_external_repository_from_repo_location(
        repo_location, cli_args.get("repository")
    )

    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo, cli_args.get("pipeline"),
    )

    noprompt = cli_args.get("noprompt")

    pipeline_partition_set_names = {
        external_partition_set.name: external_partition_set
        for external_partition_set in external_repo.get_external_partition_sets()
        if external_partition_set.pipeline_name == external_pipeline.name
    }

    if not pipeline_partition_set_names:
        raise click.UsageError(
            "No partition sets found for pipeline `{}`".format(external_pipeline.name)
        )
    partition_set_name = cli_args.get("partition_set")
    if not partition_set_name:
        if len(pipeline_partition_set_names) == 1:
            partition_set_name = next(iter(pipeline_partition_set_names.keys()))
        elif noprompt:
            raise click.UsageError("No partition set specified (see option `--partition-set`)")
        else:
            partition_set_name = click.prompt(
                "Select a partition set to use for backfill: {}".format(
                    ", ".join(x for x in pipeline_partition_set_names.keys())
                )
            )

    partition_set = pipeline_partition_set_names.get(partition_set_name)

    if not partition_set:
        raise click.UsageError("No partition set found named `{}`".format(partition_set_name))

    mode = partition_set.mode
    solid_selection = partition_set.solid_selection
    run_tags = get_tags_from_args(cli_args)

    repo_handle = RepositoryHandle(
        repository_name=external_repo.name,
        repository_location_handle=repo_location.location_handle,
    )

    # Resolve partitions to backfill
    partition_names_or_error = repo_location.get_external_partition_names(
        repo_handle, partition_set_name,
    )

    if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData):
        raise DagsterBackfillFailedError(
            "Failure fetching partition names for {partition_set_name}: {error_message}".format(
                partition_set_name=partition_set_name,
                error_message=partition_names_or_error.error.message,
            ),
            serialized_error_info=partition_names_or_error.error,
        )

    partition_names = gen_partition_names_from_args(
        partition_names_or_error.partition_names, cli_args
    )

    # Print backfill info
    print_fn("\n     Pipeline: {}".format(external_pipeline.name))
    print_fn("Partition set: {}".format(partition_set_name))
    print_fn("   Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
        "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names))
    ):

        print_fn("Launching runs... ")

        backfill_id = make_new_backfill_id()
        backfill_tags = PipelineRun.tags_for_backfill_id(backfill_id)
        partition_execution_data = repo_location.get_external_partition_set_execution_param_data(
            repository_handle=repo_handle,
            partition_set_name=partition_set_name,
            partition_names=partition_names,
        )

        if isinstance(partition_execution_data, ExternalPartitionExecutionErrorData):
            return print_fn("Backfill failed: {}".format(partition_execution_data.error))

        assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData)

        for partition_data in partition_execution_data.partition_data:
            run = _create_external_pipeline_run(
                instance=instance,
                repo_location=repo_location,
                external_repo=external_repo,
                external_pipeline=external_pipeline,
                run_config=partition_data.run_config,
                mode=mode,
                preset=None,
                tags=merge_dicts(merge_dicts(partition_data.tags, backfill_tags), run_tags),
                solid_selection=frozenset(solid_selection) if solid_selection else None,
            )

            instance.launch_run(run.run_id, external_pipeline)

        print_fn("Launched backfill job `{}`".format(backfill_id))

    else:
        print_fn("Aborted!")
예제 #9
0
def execute_backfill_command(cli_args, print_fn, instance=None):
    instance = instance or DagsterInstance.get()
    repo_location = get_repository_location_from_kwargs(cli_args, instance)
    external_repo = get_external_repository_from_repo_location(
        repo_location, cli_args.get('repository'))

    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        cli_args.get('pipeline'),
    )

    noprompt = cli_args.get('noprompt')

    pipeline_partition_set_names = {
        external_partition_set.name: external_partition_set
        for external_partition_set in
        external_repo.get_external_partition_sets()
        if external_partition_set.pipeline_name == external_pipeline.name
    }

    if not pipeline_partition_set_names:
        raise click.UsageError(
            'No partition sets found for pipeline `{}`'.format(
                external_pipeline.name))
    partition_set_name = cli_args.get('partition_set')
    if not partition_set_name:
        if len(pipeline_partition_set_names) == 1:
            partition_set_name = next(iter(
                pipeline_partition_set_names.keys()))
        elif noprompt:
            raise click.UsageError(
                'No partition set specified (see option `--partition-set`)')
        else:
            partition_set_name = click.prompt(
                'Select a partition set to use for backfill: {}'.format(
                    ', '.join(x for x in pipeline_partition_set_names.keys())))

    partition_set = pipeline_partition_set_names.get(partition_set_name)

    if not partition_set:
        raise click.UsageError(
            'No partition set found named `{}`'.format(partition_set_name))

    mode = partition_set.mode
    solid_selection = partition_set.solid_selection

    repo_handle = RepositoryHandle(
        repository_name=external_repo.name,
        repository_location_handle=repo_location.location_handle,
    )

    # Resolve partitions to backfill
    partition_names_or_error = repo_location.get_external_partition_names(
        repo_handle,
        partition_set_name,
    )

    if isinstance(partition_names_or_error,
                  ExternalPartitionExecutionErrorData):
        raise DagsterBackfillFailedError(
            'Failure fetching partition names for {partition_set_name}: {error_message}'
            .format(
                partition_set_name=partition_set_name,
                error_message=partition_names_or_error.error.message,
            ),
            serialized_error_info=partition_names_or_error.error,
        )

    partition_names = gen_partition_names_from_args(
        partition_names_or_error.partition_names, cli_args)

    # Print backfill info
    print_fn('\n     Pipeline: {}'.format(external_pipeline.name))
    print_fn('Partition set: {}'.format(partition_set_name))
    print_fn('   Partitions: {}\n'.format(
        print_partition_format(partition_names, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
            'Do you want to proceed with the backfill ({} partitions)?'.format(
                len(partition_names))):

        print_fn('Launching runs... ')
        backfill_id = make_new_backfill_id()

        run_tags = merge_dicts(
            PipelineRun.tags_for_backfill_id(backfill_id),
            get_tags_from_args(cli_args),
        )

        for partition_name in partition_names:
            run_config_or_error = repo_location.get_external_partition_config(
                repo_handle, partition_set_name, partition_name)
            if isinstance(run_config_or_error,
                          ExternalPartitionExecutionErrorData):
                raise DagsterBackfillFailedError(
                    'Failure fetching run config for partition {partition_name} in {partition_set_name}: {error_message}'
                    .format(
                        partition_name=partition_name,
                        partition_set_name=partition_set_name,
                        error_message=run_config_or_error.error.message,
                    ),
                    serialized_error_info=run_config_or_error.error,
                )

            tags_or_error = repo_location.get_external_partition_tags(
                repo_handle, partition_set_name, partition_name)
            if isinstance(tags_or_error, ExternalPartitionExecutionErrorData):
                raise DagsterBackfillFailedError(
                    'Failure fetching tags for partition {partition_name} in {partition_set_name}: {error_message}'
                    .format(
                        partition_name=partition_name,
                        partition_set_name=partition_set_name,
                        error_message=tags_or_error.error.message,
                    ),
                    serialized_error_info=tags_or_error.error,
                )
            run = _create_external_pipeline_run(
                instance=instance,
                repo_location=repo_location,
                external_repo=external_repo,
                external_pipeline=external_pipeline,
                run_config=run_config_or_error.run_config,
                mode=mode,
                preset=None,
                tags=merge_dicts(tags_or_error.tags, run_tags),
                solid_selection=frozenset(solid_selection)
                if solid_selection else None,
            )

            instance.launch_run(run.run_id, external_pipeline)
            # Remove once we can handle synchronous execution... currently limited by sqlite
            time.sleep(0.1)

        print_fn('Launched backfill job `{}`'.format(backfill_id))
    else:
        print_fn('Aborted!')