def execute_preview_command( sensor_name, since, last_run_key, cursor, cli_args, print_fn, instance=None ): with DagsterInstance.get() as instance: with get_repository_location_from_kwargs(cli_args) as repo_location: try: external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) check_repo_and_scheduler(external_repo, instance) external_sensor = external_repo.get_external_sensor(sensor_name) try: sensor_runtime_data = repo_location.get_external_sensor_execution_data( instance, external_repo.handle, external_sensor.name, since, last_run_key, cursor, ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) print_fn( "Failed to resolve sensor for {sensor_name} : {error_info}".format( sensor_name=external_sensor.name, error_info=error_info.to_string(), ) ) return if not sensor_runtime_data.run_requests: if sensor_runtime_data.skip_message: print_fn( "Sensor returned false for {sensor_name}, skipping: {skip_message}".format( sensor_name=external_sensor.name, skip_message=sensor_runtime_data.skip_message, ) ) else: print_fn( "Sensor returned false for {sensor_name}, skipping".format( sensor_name=external_sensor.name ) ) else: print_fn( "Sensor returning run requests for {num} run(s):\n\n{run_requests}".format( num=len(sensor_runtime_data.run_requests), run_requests="\n".join( yaml.safe_dump(run_request.run_config, default_flow_style=False) for run_request in sensor_runtime_data.run_requests ), ) ) except DagsterInvariantViolationError as ex: raise click.UsageError(ex)
def _logged_pipeline_launch_command(config, preset, mode, instance, kwargs): check.inst_param(instance, 'instance', DagsterInstance) env = ( canonicalize_backcompat_args( (config if config else None), '--config', (kwargs.get('env') if kwargs.get('env') else None), '--env', '0.9.0', stacklevel=2, # this stacklevel can point the warning to this line ) or tuple() # back to default empty tuple ) env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str)) repo_location = get_repository_location_from_kwargs(kwargs, instance) external_repo = get_external_repository_from_repo_location( repo_location, kwargs.get('repository')) external_pipeline = get_external_pipeline_from_external_repo( external_repo, kwargs.get('pipeline'), ) log_external_repo_stats( instance=instance, external_pipeline=external_pipeline, external_repo=external_repo, source='pipeline_launch_command', ) if preset: if env: raise click.UsageError('Can not use --preset with --config.') preset = external_pipeline.get_preset(preset) else: preset = None run_tags = get_tags_from_args(kwargs) solid_selection = get_solid_selection_from_args(kwargs) pipeline_run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=get_run_config_from_env_file_list(env), mode=mode, preset=preset, tags=run_tags, solid_selection=solid_selection, ) return instance.launch_run(pipeline_run.run_id, external_pipeline)
def launch_scheduled_execution(output_file, schedule_name, **kwargs): with ipc_write_stream(output_file) as stream: with DagsterInstance.get() as instance: repository_origin = get_repository_origin_from_kwargs(kwargs) job_origin = repository_origin.get_job_origin(schedule_name) # open the tick scope before we load any external artifacts so that # load errors are stored in DB with _schedule_tick_state( instance, stream, JobTickData( job_origin_id=job_origin.get_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=time.time(), ), ) as tick_context: with get_repository_location_from_kwargs( kwargs) as repo_location: repo_dict = repo_location.get_repositories() check.invariant( repo_dict and len(repo_dict) == 1, "Passed in arguments should reference exactly one repository, instead there are {num_repos}" .format(num_repos=len(repo_dict)), ) external_repo = next(iter(repo_dict.values())) check.invariant( schedule_name in [ schedule.name for schedule in external_repo.get_external_schedules() ], "Could not find schedule named {schedule_name}".format( schedule_name=schedule_name), ) external_schedule = external_repo.get_external_schedule( schedule_name) tick_context.update_with_status( status=JobTickStatus.STARTED) _launch_scheduled_execution( instance, repo_location, external_repo, external_schedule, tick_context, stream, )
def execute_cursor_command(sensor_name, cli_args, print_fn): with DagsterInstance.get() as instance: with get_repository_location_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as repo_location: if bool(cli_args.get("delete")) == bool(cli_args.get("set")): # must use one of delete/set raise click.UsageError( "Must set cursor using `--set <value>` or use `--delete`") cursor_value = cli_args.get("set") external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository")) check_repo_and_scheduler(external_repo, instance) external_sensor = external_repo.get_external_sensor(sensor_name) job_state = instance.get_instigator_state( external_sensor.get_external_origin_id(), external_sensor.selector_id) if not job_state: instance.add_instigator_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.STOPPED, SensorInstigatorData( min_interval=external_sensor.min_interval_seconds, cursor=cursor_value), )) else: instance.update_instigator_state( job_state.with_data( SensorInstigatorData( last_tick_timestamp=job_state.instigator_data. last_tick_timestamp, last_run_key=job_state.instigator_data. last_run_key, min_interval=external_sensor.min_interval_seconds, cursor=cursor_value, ), )) if cursor_value: print_fn( f'Set cursor state for sensor {external_sensor.name} to "{cursor_value}"' ) else: print_fn( f"Cleared cursor state for sensor {external_sensor.name}")
def launch_scheduled_execution(output_file, schedule_name, **kwargs): with ipc_write_stream(output_file) as stream: instance = DagsterInstance.get() repository_origin = get_repository_origin_from_kwargs(kwargs) schedule_origin = repository_origin.get_schedule_origin(schedule_name) # open the tick scope before we load any external artifacts so that # load errors are stored in DB with _schedule_tick_state( instance, stream, ScheduleTickData( schedule_origin_id=schedule_origin.get_id(), schedule_name=schedule_name, timestamp=time.time(), cron_schedule=None, # not yet loaded status=ScheduleTickStatus.STARTED, ), ) as tick: repo_location = get_repository_location_from_kwargs( kwargs, instance) repo_dict = repo_location.get_repositories() check.invariant( repo_dict and len(repo_dict) == 1, 'Passed in arguments should reference exactly one repository, instead there are {num_repos}' .format(num_repos=len(repo_dict)), ) external_repo = next(iter(repo_dict.values())) check.invariant( schedule_name in [ schedule.name for schedule in external_repo.get_external_schedules() ], 'Could not find schedule named {schedule_name}'.format( schedule_name=schedule_name), ) external_schedule = external_repo.get_external_schedule( schedule_name) tick.update_with_status( status=ScheduleTickStatus.STARTED, cron_schedule=external_schedule.cron_schedule, ) _launch_scheduled_execution(instance, repo_location, external_repo, external_schedule, tick, stream)
def execute_launch_command(instance, kwargs): preset = kwargs.get("preset") mode = kwargs.get("mode") check.inst_param(instance, "instance", DagsterInstance) config = get_config_from_args(kwargs) with get_repository_location_from_kwargs(kwargs) as repo_location: external_repo = get_external_repository_from_repo_location( repo_location, kwargs.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, kwargs.get("pipeline") ) log_external_repo_stats( instance=instance, external_pipeline=external_pipeline, external_repo=external_repo, source="pipeline_launch_command", ) if preset and config: raise click.UsageError("Can not use --preset with -c / --config / --config-json.") run_tags = get_tags_from_args(kwargs) solid_selection = get_solid_selection_from_args(kwargs) pipeline_run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=config, mode=mode, preset=preset, tags=run_tags, solid_selection=solid_selection, run_id=kwargs.get("run_id"), ) return instance.submit_run(pipeline_run.run_id, external_pipeline)
def execute_launch_command(instance, kwargs): preset = kwargs.get("preset") mode = kwargs.get("mode") check.inst_param(instance, "instance", DagsterInstance) config = list(check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) with get_repository_location_from_kwargs(kwargs, instance) as repo_location: external_repo = get_external_repository_from_repo_location( repo_location, kwargs.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, kwargs.get("pipeline"), ) log_external_repo_stats( instance=instance, external_pipeline=external_pipeline, external_repo=external_repo, source="pipeline_launch_command", ) if preset and config: raise click.UsageError("Can not use --preset with --config.") run_tags = get_tags_from_args(kwargs) solid_selection = get_solid_selection_from_args(kwargs) pipeline_run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=get_run_config_from_file_list(config), mode=mode, preset=preset, tags=run_tags, solid_selection=solid_selection, ) return instance.launch_run(pipeline_run.run_id, external_pipeline)
def execute_backfill_command(cli_args, print_fn, instance): with get_repository_location_from_kwargs(cli_args) as repo_location: _execute_backfill_command_at_location(cli_args, print_fn, instance, repo_location)
def launch_scheduled_execution(output_file, schedule_name, override_system_timezone, **kwargs): with (mock_system_timezone(override_system_timezone) if override_system_timezone else nullcontext()): with ipc_write_stream(output_file) as stream: with DagsterInstance.get() as instance: repository_origin = get_repository_origin_from_kwargs(kwargs) job_origin = repository_origin.get_job_origin(schedule_name) # open the tick scope before we load any external artifacts so that # load errors are stored in DB with _schedule_tick_context( instance, stream, JobTickData( job_origin_id=job_origin.get_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=time.time(), ), ) as tick_context: with get_repository_location_from_kwargs( kwargs) as repo_location: repo_dict = repo_location.get_repositories() check.invariant( repo_dict and len(repo_dict) == 1, "Passed in arguments should reference exactly one repository, instead there are {num_repos}" .format(num_repos=len(repo_dict)), ) external_repo = next(iter(repo_dict.values())) if not schedule_name in [ schedule.name for schedule in external_repo.get_external_schedules() ]: raise DagsterInvariantViolationError( "Could not find schedule named {schedule_name}" .format(schedule_name=schedule_name), ) external_schedule = external_repo.get_external_schedule( schedule_name) # Validate that either the schedule has no timezone or it matches # the system timezone schedule_timezone = external_schedule.execution_timezone if schedule_timezone: system_timezone = pendulum.now().timezone.name if system_timezone != external_schedule.execution_timezone: raise DagsterInvariantViolationError( "Schedule {schedule_name} is set to execute in {schedule_timezone}, " "but this scheduler can only run in the system timezone, " "{system_timezone}. Use DagsterDaemonScheduler if you want to be able " "to execute schedules in arbitrary timezones." .format( schedule_name=external_schedule.name, schedule_timezone=schedule_timezone, system_timezone=system_timezone, ), ) _launch_scheduled_executions(instance, repo_location, external_repo, external_schedule, tick_context)
def execute_backfill_command(cli_args, print_fn, instance=None): instance = instance or DagsterInstance.get() repo_location = get_repository_location_from_kwargs(cli_args, instance) external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get('repository')) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get('pipeline'), ) noprompt = cli_args.get('noprompt') pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format( external_pipeline.name)) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter( pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError( 'No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x for x in pipeline_partition_set_names.keys()))) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError( 'No partition set found named `{}`'.format(partition_set_name)) mode = partition_set.mode solid_selection = partition_set.solid_selection repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location_handle=repo_location.location_handle, ) # Resolve partitions to backfill partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching partition names for {partition_set_name}: {error_message}' .format( partition_set_name=partition_set_name, error_message=partition_names_or_error.error.message, ), serialized_error_info=partition_names_or_error.error, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(external_pipeline.name)) print_fn('Partition set: {}'.format(partition_set_name)) print_fn(' Partitions: {}\n'.format( print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format( len(partition_names))): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition_name in partition_names: run_config_or_error = repo_location.get_external_partition_config( repo_handle, partition_set_name, partition_name) if isinstance(run_config_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching run config for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=run_config_or_error.error.message, ), serialized_error_info=run_config_or_error.error, ) tags_or_error = repo_location.get_external_partition_tags( repo_handle, partition_set_name, partition_name) if isinstance(tags_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching tags for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=tags_or_error.error.message, ), serialized_error_info=tags_or_error.error, ) run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=run_config_or_error.run_config, mode=mode, preset=None, tags=merge_dicts(tags_or_error.tags, run_tags), solid_selection=frozenset(solid_selection) if solid_selection else None, ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn('Aborted!')