def execute_list_command(running_filter, stopped_filter, name_filter, cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs(cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if not name_filter: title = "Repository {name}".format(name=repository_name) print_fn(title) print_fn("*" * len(title)) first = True if running_filter: schedules = [ s for s in instance.all_stored_job_state( external_repo.get_external_origin_id(), job_type=JobType.SCHEDULE ) if s.status == JobStatus.RUNNING ] elif stopped_filter: schedules = [ s for s in instance.all_stored_job_state( external_repo.get_external_origin_id(), job_type=JobType.SCHEDULE ) if s.status == JobStatus.STOPPED ] else: schedules = instance.all_stored_job_state( external_repo.get_external_origin_id(), job_type=JobType.SCHEDULE ) for schedule_state in schedules: # If --name filter is present, only print the schedule name if name_filter: print_fn(schedule_state.job_name) continue flag = ( "[{status}]".format(status=schedule_state.status.value) if schedule_state else "" ) schedule_title = "Schedule: {name} {flag}".format( name=schedule_state.job_name, flag=flag ) if not first: print_fn("*" * len(schedule_title)) first = False print_fn(schedule_title) print_fn( "Cron Schedule: {cron_schedule}".format( cron_schedule=schedule_state.job_specific_data.cron_schedule ) )
def execute_list_command(cli_args, print_fn, using_job_op_graph_apis=False): with get_instance_for_service( "``dagster job list``" if using_job_op_graph_apis else "``dagster pipeline list``") as instance: with get_external_repository_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as external_repository: title = "Repository {name}".format(name=external_repository.name) print_fn(title) print_fn("*" * len(title)) first = True for pipeline in (external_repository.get_external_jobs() if using_job_op_graph_apis else external_repository.get_all_external_pipelines()): pipeline_title = "{pipeline_or_job}: {name}".format( pipeline_or_job="Job" if using_job_op_graph_apis else "Pipeline", name=pipeline.name, ) if not first: print_fn("*" * len(pipeline_title)) first = False print_fn(pipeline_title) if pipeline.description: print_fn("Description:") print_fn( format_description(pipeline.description, indent=" " * 4)) print_fn("{solid_or_op}: (Execution Order)".format( solid_or_op="Ops" if using_job_op_graph_apis else "Solids") ) for solid_name in pipeline.pipeline_snapshot.solid_names_in_topological_order: print_fn(" " + solid_name)
def execute_start_command(schedule_name, all_flag, cli_args, print_fn): instance = DagsterInstance.get() with get_external_repository_from_kwargs(cli_args, instance) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if all_flag: for external_schedule in external_repo.get_external_schedules(): try: instance.start_schedule_and_update_storage_state(external_schedule) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Started all schedules for repository {repository_name}".format( repository_name=repository_name ) ) else: try: instance.start_schedule_and_update_storage_state( external_repo.get_external_schedule(schedule_name) ) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Started schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_preview_command(cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs(cli_args, instance) as external_repo: check_repo_and_scheduler(external_repo, instance) print_changes(external_repo, instance, print_fn, preview=True)
def execute_start_command(sensor_name, all_flag, cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs(cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if all_flag: try: for external_sensor in external_repo.get_external_sensors( ): instance.start_sensor(external_sensor) print_fn( "Started all sensors for repository {repository_name}". format(repository_name=repository_name)) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) else: try: external_sensor = external_repo.get_external_sensor( sensor_name) instance.start_sensor(external_sensor) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Started sensor {sensor_name}".format( sensor_name=sensor_name))
def command(**kwargs): with get_external_repository_from_kwargs( DagsterInstance.get(), version="", kwargs=kwargs, ) as external_repo: if repo_assert_fn: repo_assert_fn(external_repo)
def execute_list_command(running_filter, stopped_filter, name_filter, cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if not name_filter: title = "Repository {name}".format(name=repository_name) print_fn(title) print_fn("*" * len(title)) stored_sensors_by_id = {} for job_state in instance.all_stored_job_state( external_repo.get_external_origin_id(), JobType.SENSOR): stored_sensors_by_id[job_state.job_origin_id] = job_state all_state = [ stored_sensors_by_id.get( external_sensor.get_external_origin_id(), external_sensor.get_default_job_state(instance), ) for external_sensor in external_repo.get_external_sensors() ] if running_filter: jobs = [ job_state for job_state in all_state if job_state.status == JobStatus.RUNNING ] elif stopped_filter: jobs = [ job_state for job_state in all_state if job_state.status == JobStatus.STOPPED ] else: jobs = all_state first = True for job_state in jobs: # If --name filter is present, only print the job name if name_filter: print_fn(job_state.job_name) continue flag = "[{status}]".format( status=job_state.status.value) if job_state else "" job_title = "Sensor: {name} {flag}".format( name=job_state.job_name, flag=flag) if not first: print_fn("*" * len(job_title)) first = False print_fn(job_title)
def execute_logs_command(schedule_name, cli_args, print_fn, instance=None): instance = DagsterInstance.get() external_repo = get_external_repository_from_kwargs(cli_args, instance) check_repo_and_scheduler(external_repo, instance) logs_path = os.path.join( instance.logs_path_for_schedule( external_repo.get_external_schedule( schedule_name).get_origin_id())) print_fn(logs_path)
def execute_restart_command(schedule_name, all_running_flag, cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if all_running_flag: for schedule_state in instance.all_instigator_state( external_repo.get_external_origin_id(), external_repo.selector_id, InstigatorType.SCHEDULE, ): if schedule_state.status == InstigatorStatus.RUNNING: try: external_schedule = external_repo.get_external_schedule( schedule_state.instigator_name) instance.stop_schedule( schedule_state.instigator_origin_id, external_schedule.selector_id, external_schedule, ) instance.start_schedule(external_schedule) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Restarted all running schedules for repository {name}". format(name=repository_name)) else: external_schedule = external_repo.get_external_schedule( schedule_name) schedule_state = instance.get_instigator_state( external_schedule.get_external_origin_id(), external_schedule.selector_id, ) if schedule_state != None and schedule_state.status != InstigatorStatus.RUNNING: click.UsageError( "Cannot restart a schedule {name} because is not currently running" .format(name=schedule_state.instigator_name)) try: instance.stop_schedule( schedule_state.instigator_origin_id, external_schedule.selector_id, external_schedule, ) instance.start_schedule(external_schedule) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Restarted schedule {schedule_name}".format( schedule_name=schedule_name))
def execute_stop_command(sensor_name, cli_args, print_fn, instance=None): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs(cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) try: external_sensor = external_repo.get_external_sensor(sensor_name) instance.stop_sensor(external_sensor.get_external_origin_id()) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Stopped sensor {sensor_name}".format(sensor_name=sensor_name))
def execute_wipe_command(cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs(cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) confirmation = click.prompt( "Are you sure you want to turn off all schedules and delete all schedule history? Type DELETE" ) if confirmation == "DELETE": instance.wipe_all_schedules() print_fn("Turned off all schedules and deleted all schedule history") else: print_fn("Exiting without turning off schedules or deleting schedule history")
def execute_up_command(preview, cli_args, print_fn): instance = DagsterInstance.get() with get_external_repository_from_kwargs(cli_args, instance) as external_repo: check_repo_and_scheduler(external_repo, instance) print_changes(external_repo, instance, print_fn, preview=preview) if preview: return try: instance.reconcile_scheduler_state(external_repo) except DagsterInvariantViolationError as ex: raise click.UsageError(ex)
def execute_stop_command(schedule_name, cli_args, print_fn, instance=None): instance = DagsterInstance.get() with get_external_repository_from_kwargs(cli_args, instance) as external_repo: check_repo_and_scheduler(external_repo, instance) try: instance.stop_schedule_and_update_storage_state( external_repo.get_external_schedule(schedule_name).get_origin_id() ) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Stopped schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_wipe_command(cli_args, print_fn): instance = DagsterInstance.get() with get_external_repository_from_kwargs(cli_args, instance) as external_repo: check_repo_and_scheduler(external_repo, instance) confirmation = click.prompt( "Are you sure you want to delete all schedules and schedule cron jobs? Type DELETE" ) if confirmation == "DELETE": instance.wipe_all_schedules() print_fn("Wiped all schedules and schedule cron jobs") else: click.echo("Exiting without deleting all schedules and schedule cron jobs")
def execute_list_command(running_filter, stopped_filter, name_filter, cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if not name_filter: title = "Repository {name}".format(name=repository_name) print_fn(title) print_fn("*" * len(title)) repo_schedules = external_repo.get_external_schedules() stored_schedules_by_origin_id = { stored_schedule_state.instigator_origin_id: stored_schedule_state for stored_schedule_state in instance.all_instigator_state( external_repo.get_external_origin_id(), external_repo.selector_id, instigator_type=InstigatorType.SCHEDULE, ) } first = True for external_schedule in repo_schedules: schedule_state = external_schedule.get_current_instigator_state( stored_schedules_by_origin_id.get( external_schedule.get_external_origin_id())) if running_filter and not schedule_state.is_running: continue if stopped_filter and schedule_state.is_running: continue if name_filter: print_fn(external_schedule.name) continue status = "RUNNING" if schedule_state.is_running else "STOPPED" schedule_title = f"Schedule: {external_schedule.name} [{status}]" if not first: print_fn("*" * len(schedule_title)) first = False print_fn(schedule_title) print_fn(f"Cron Schedule: {external_schedule.cron_schedule}")
def execute_logs_command(schedule_name, cli_args, print_fn, instance=None): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) if isinstance(instance.scheduler, DagsterDaemonScheduler): return print_fn( "This command is deprecated for the DagsterDaemonScheduler. " "Logs for the DagsterDaemonScheduler written to the process output. " "For help troubleshooting the Daemon Scheduler, see " "https://docs.dagster.io/troubleshooting/schedules") logs_path = os.path.join( instance.logs_path_for_schedule( external_repo.get_external_schedule( schedule_name).get_external_origin_id())) logs_directory = os.path.dirname(logs_path) result_files = glob.glob("{}/*.result".format(logs_directory)) most_recent_log = max( result_files, key=os.path.getctime) if result_files else None output = "" title = "Scheduler Logs:" output += "{title}\n{sep}\n{info}\n".format( title=title, sep="=" * len(title), info=logs_path, ) title = ( "Schedule Execution Logs:" "\nEvent logs from schedule execution. " "Errors that caused schedule executions to not run or fail can be found here. " ) most_recent_info = ( "\nMost recent execution log: {}".format(most_recent_log) if most_recent_log else "") info = "All execution logs: {}{}".format(logs_directory, most_recent_info) output += "\n{title}\n{sep}\n{info}\n".format( title=title, sep="=" * len(title), info=info, ) print_fn(output)
def execute_list_command(running_filter, stopped_filter, name_filter, cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if not name_filter: title = "Repository {name}".format(name=repository_name) print_fn(title) print_fn("*" * len(title)) repo_schedules = external_repo.get_external_schedules() stored_schedules_by_origin_id = { stored_schedule_state.job_origin_id: stored_schedule_state for stored_schedule_state in instance.all_stored_job_state( external_repo.get_external_origin_id(), job_type=InstigatorType.SCHEDULE) } first = True for external_schedule in repo_schedules: stored_schedule_state = stored_schedules_by_origin_id.get( external_schedule.get_external_origin_id()) if running_filter and (not stored_schedule_state or stored_schedule_state.status == InstigatorStatus.STOPPED): continue if stopped_filter and stored_schedule_state and InstigatorStatus.RUNNING: continue if name_filter: print_fn(external_schedule.name) continue status = (stored_schedule_state.status if stored_schedule_state else InstigatorStatus.STOPPED) schedule_title = f"Schedule: {external_schedule.name} [{status.value}]" if not first: print_fn("*" * len(schedule_title)) first = False print_fn(schedule_title) print_fn(f"Cron Schedule: {external_schedule.cron_schedule}")
def execute_list_command(running_filter, stopped_filter, name_filter, cli_args, print_fn): instance = DagsterInstance.get() external_repo = get_external_repository_from_kwargs(cli_args, instance) check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if not name_filter: title = 'Repository {name}'.format(name=repository_name) print_fn(title) print_fn('*' * len(title)) first = True if running_filter: schedules = [ s for s in instance.all_stored_schedule_state( external_repo.get_origin_id()) if s.status == ScheduleStatus.RUNNING ] elif stopped_filter: schedules = [ s for s in instance.all_stored_schedule_state( external_repo.get_origin_id()) if s.status == ScheduleStatus.STOPPED ] else: schedules = instance.all_stored_schedule_state( external_repo.get_origin_id()) for schedule_state in schedules: # If --name filter is present, only print the schedule name if name_filter: print_fn(schedule_state.name) continue flag = "[{status}]".format( status=schedule_state.status.value) if schedule_state else "" schedule_title = 'Schedule: {name} {flag}'.format( name=schedule_state.name, flag=flag) if not first: print_fn('*' * len(schedule_title)) first = False print_fn(schedule_title) print_fn('Cron Schedule: {cron_schedule}'.format( cron_schedule=schedule_state.cron_schedule))
def execute_restart_command(schedule_name, all_running_flag, cli_args, print_fn): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs(cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) repository_name = external_repo.name if all_running_flag: for schedule_state in instance.all_stored_schedule_state( external_repo.get_origin_id() ): if schedule_state.status == ScheduleStatus.RUNNING: try: external_schedule = external_repo.get_external_schedule( schedule_state.name ) instance.stop_schedule_and_update_storage_state( schedule_state.schedule_origin_id ) instance.start_schedule_and_update_storage_state(external_schedule) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Restarted all running schedules for repository {name}".format( name=repository_name ) ) else: external_schedule = external_repo.get_external_schedule(schedule_name) schedule_state = instance.get_schedule_state(external_schedule.get_origin_id()) if schedule_state.status != ScheduleStatus.RUNNING: click.UsageError( "Cannot restart a schedule {name} because is not currently running".format( name=schedule_state.name ) ) try: instance.stop_schedule_and_update_storage_state( schedule_state.schedule_origin_id ) instance.start_schedule_and_update_storage_state(external_schedule) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Restarted schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_stop_command(schedule_name, cli_args, print_fn, instance=None): with DagsterInstance.get() as instance: with get_external_repository_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as external_repo: check_repo_and_scheduler(external_repo, instance) try: external_schedule = external_repo.get_external_schedule( schedule_name) instance.stop_schedule( external_schedule.get_external_origin_id(), external_schedule) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Stopped schedule {schedule_name}".format( schedule_name=schedule_name))
def execute_list_command(cli_args, print_fn, instance): check.inst_param(instance, "instance", DagsterInstance) with get_external_repository_from_kwargs(cli_args, instance) as external_repository: title = "Repository {name}".format(name=external_repository.name) print_fn(title) print_fn("*" * len(title)) first = True for pipeline in external_repository.get_all_external_pipelines(): pipeline_title = "Pipeline: {name}".format(name=pipeline.name) if not first: print_fn("*" * len(pipeline_title)) first = False print_fn(pipeline_title) if pipeline.description: print_fn("Description:") print_fn(format_description(pipeline.description, indent=" " * 4)) print_fn("Solids: (Execution Order)") for solid_name in pipeline.pipeline_snapshot.solid_names_in_topological_order: print_fn(" " + solid_name)
def execute_list_command(cli_args, print_fn, instance): check.inst_param(instance, 'instance', DagsterInstance) external_repository = get_external_repository_from_kwargs(cli_args, instance) title = 'Repository {name}'.format(name=external_repository.name) print_fn(title) print_fn('*' * len(title)) first = True for pipeline in external_repository.get_all_external_pipelines(): pipeline_title = 'Pipeline: {name}'.format(name=pipeline.name) if not first: print_fn('*' * len(pipeline_title)) first = False print_fn(pipeline_title) if pipeline.description: print_fn('Description:') print_fn(format_description(pipeline.description, indent=' ' * 4)) print_fn('Solids: (Execution Order)') for solid_name in pipeline.pipeline_snapshot.solid_names_in_topological_order: print_fn(' ' + solid_name)
def execute_list_command(cli_args, print_fn): external_repository = get_external_repository_from_kwargs(cli_args) # We should move this to use external repository # https://github.com/dagster-io/dagster/issues/2556 repository = repository_def_from_repository_handle(external_repository.handle) title = 'Repository {name}'.format(name=repository.name) print_fn(title) print_fn('*' * len(title)) first = True for pipeline in repository.get_all_pipelines(): pipeline_title = 'Pipeline: {name}'.format(name=pipeline.name) if not first: print_fn('*' * len(pipeline_title)) first = False print_fn(pipeline_title) if pipeline.description: print_fn('Description:') print_fn(format_description(pipeline.description, indent=' ' * 4)) print_fn('Solids: (Execution Order)') for solid in pipeline.solids_in_topological_order: print_fn(' ' + solid.name)
def command(**kwargs): with get_external_repository_from_kwargs( kwargs, DagsterInstance.ephemeral()) as external_repo: capture_result["external_repo"] = external_repo
def command(**kwargs): capture_result['external_repo'] = get_external_repository_from_kwargs( kwargs)
def command(**kwargs): with get_external_repository_from_kwargs(kwargs) as external_repo: if repo_assert_fn: repo_assert_fn(external_repo)
def command(**kwargs): with get_external_repository_from_kwargs(kwargs) as external_repo: capture_result["external_repo"] = external_repo
def command(**kwargs): capture_result['external_repo'] = get_external_repository_from_kwargs( kwargs, DagsterInstance.ephemeral())
def execute_backfill_command(cli_args, print_fn, instance=None): instance = instance or DagsterInstance.get() external_pipeline = get_external_pipeline_from_kwargs(cli_args, instance) external_repository = get_external_repository_from_kwargs(cli_args, instance) # We should move this to use external repository # https://github.com/dagster-io/dagster/issues/2556 recon_repo = recon_repo_from_external_repo(external_repository) repo_def = recon_repo.get_definition() noprompt = cli_args.get('noprompt') pipeline_def = repo_def.get_pipeline(external_pipeline.name) # Resolve partition set all_partition_sets = repo_def.partition_set_defs + [ schedule_def.get_partition_set() for schedule_def in repo_def.schedule_defs if isinstance(schedule_def, PartitionScheduleDefinition) ] pipeline_partition_sets = [ x for x in all_partition_sets if x.pipeline_name == pipeline_def.name ] if not pipeline_partition_sets: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format(pipeline_def.name) ) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_sets) == 1: partition_set_name = pipeline_partition_sets[0].name elif noprompt: raise click.UsageError('No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x.name for x in pipeline_partition_sets) ) ) partition_set = next((x for x in pipeline_partition_sets if x.name == partition_set_name), None) if not partition_set: raise click.UsageError('No partition set found named `{}`'.format(partition_set_name)) # Resolve partitions to backfill partitions = gen_partitions_from_args(partition_set, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(pipeline_def.name)) print_fn('Partition set: {}'.format(partition_set.name)) print_fn(' Partitions: {}\n'.format(print_partition_format(partitions, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format(len(partitions)) ): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition in partitions: run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, mode=partition_set.mode, solids_to_execute=frozenset(partition_set.solid_selection) if partition_set and partition_set.solid_selection else None, run_config=partition_set.run_config_for_partition(partition), tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags), ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn(' Aborted!')