def execute_start_command(schedule_name, all_flag, cli_args, print_fn): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.get_definition() repository_name = repository.name if all_flag: for schedule in instance.all_schedules(repository_name): try: schedule = instance.start_schedule_and_update_storage_state( repository_name, schedule.name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Started all schedules for repository {repository_name}".format( repository_name=repository_name)) else: try: schedule = instance.start_schedule_and_update_storage_state( repository_name, schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Started schedule {schedule_name}".format( schedule_name=schedule_name))
def construct_environment_yaml(preset_name, env, pipeline_name, module_name): # Load environment dict from either a preset or yaml file globs if preset_name: if env: raise click.UsageError('Can not use --preset with --env.') cli_args = { 'fn_name': pipeline_name, 'pipeline_name': pipeline_name, 'module_name': module_name, } pipeline = recon_repo_for_cli_args( cli_args).get_definition().get_pipeline(pipeline_name) environment_dict = pipeline.get_preset(preset_name).environment_dict else: env = list(env) environment_dict = load_yaml_from_glob_list(env) if env else {} # If not provided by the user, ensure we have storage location defined if 'storage' not in environment_dict: system_tmp_path = seven.get_system_temp_directory() dagster_tmp_path = os.path.join(system_tmp_path, 'dagster-airflow', pipeline_name) environment_dict['storage'] = { 'filesystem': { 'config': { 'base_dir': six.ensure_str(dagster_tmp_path) } } } return environment_dict
def construct_environment_yaml(preset_name, config, pipeline_name, module_name): # Load environment dict from either a preset or yaml file globs if preset_name: if config: raise click.UsageError("Can not use --preset with --config.") cli_args = { "fn_name": pipeline_name, "pipeline_name": pipeline_name, "module_name": module_name, } pipeline = recon_repo_for_cli_args( cli_args).get_definition().get_pipeline(pipeline_name) run_config = pipeline.get_preset(preset_name).run_config else: config = list(config) run_config = load_yaml_from_glob_list(config) if config else {} # If not provided by the user, ensure we have storage location defined if "intermediate_storage" not in run_config: system_tmp_path = seven.get_system_temp_directory() dagster_tmp_path = os.path.join(system_tmp_path, "dagster-airflow", pipeline_name) run_config["intermediate_storage"] = { "filesystem": { "config": { "base_dir": six.ensure_str(dagster_tmp_path) } } } return run_config
def construct_environment_yaml(preset_name, config, pipeline_name, module_name): # Load environment dict from either a preset or yaml file globs cli_args = { "fn_name": pipeline_name, "module_name": module_name, } pipeline_def = recon_repo_for_cli_args(cli_args).get_definition().get_pipeline(pipeline_name) if preset_name: if config: raise click.UsageError("Can not use --preset with --config.") run_config = pipeline_def.get_preset(preset_name).run_config else: config = list(config) run_config = load_yaml_from_glob_list(config) if config else {} if ( not can_isolate_steps(pipeline_def, pipeline_def.get_default_mode()) and "intermediate_storage" not in run_config ): system_tmp_path = seven.get_system_temp_directory() dagster_tmp_path = os.path.join(system_tmp_path, "dagster-airflow", pipeline_name) run_config["intermediate_storage"] = { "filesystem": {"config": {"base_dir": dagster_tmp_path}} } return run_config
def execute_restart_command(schedule_name, all_running_flag, cli_args, print_fn): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.get_definition() if all_running_flag: for schedule in instance.all_schedules(repository): if schedule.status == ScheduleStatus.RUNNING: try: instance.stop_schedule(repository, schedule.name) instance.start_schedule(repository, schedule.name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Restarted all running schedules for repository {name}".format(name=repository.name) ) else: schedule = instance.get_schedule_by_name(repository, schedule_name) if schedule.status != ScheduleStatus.RUNNING: click.UsageError( "Cannot restart a schedule {name} because is not currently running".format( name=schedule.name ) ) try: instance.stop_schedule(repository, schedule_name) instance.start_schedule(repository, schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Restarted schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_preview_command(cli_args, print_fn): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.get_definition() print_changes(repository, instance, print_fn, preview=True)
def host_dagit_ui(host, port, storage_fallback, reload_trigger=None, port_lookup=True, **kwargs): return host_dagit_ui_with_reconstructable_repo( recon_repo_for_cli_args(kwargs), host, port, storage_fallback, reload_trigger, port_lookup)
def execute_logs_command(schedule_name, cli_args, print_fn, instance=None): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.get_definition() repository_name = repository.name logs_path = os.path.join( instance.logs_path_for_schedule(repository_name, schedule_name)) print_fn(logs_path)
def execute_wipe_command(cli_args, print_fn): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) confirmation = click.prompt( 'Are you sure you want to delete all schedules and schedule cron jobs? Type DELETE' ) if confirmation == 'DELETE': instance.wipe_all_schedules() print_fn("Wiped all schedules and schedule cron jobs") else: click.echo('Exiting without deleting all schedules and schedule cron jobs')
def execute_stop_command(schedule_name, cli_args, print_fn, instance=None): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.get_definition() try: instance.stop_schedule(repository, schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Stopped schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_list_command(running_filter, stopped_filter, name_filter, cli_args, print_fn): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.get_definition() repository_name = repository.name if not name_filter: title = 'Repository {name}'.format(name=repository_name) print_fn(title) print_fn('*' * len(title)) first = True if running_filter: schedules = [ s for s in instance.all_schedules(repository_name) if s.status == ScheduleStatus.RUNNING ] elif stopped_filter: schedules = [ s for s in instance.all_schedules(repository_name) if s.status == ScheduleStatus.STOPPED ] else: schedules = instance.all_schedules(repository_name) for schedule in schedules: schedule_def = repository.get_schedule_def(schedule.name) # If --name filter is present, only print the schedule name if name_filter: print_fn(schedule_def.name) continue flag = "[{status}]".format( status=schedule.status.value) if schedule else "" schedule_title = 'Schedule: {name} {flag}'.format( name=schedule_def.name, flag=flag) if not first: print_fn('*' * len(schedule_title)) first = False print_fn(schedule_title) print_fn('Cron Schedule: {cron_schedule}'.format( cron_schedule=schedule_def.cron_schedule))
def load_dagit_for_repo_cli_args(n_pipelines=1, **kwargs): handle = recon_repo_for_cli_args(kwargs) app = create_app_with_reconstructable_repo(handle, DagsterInstance.ephemeral()) client = app.test_client() res = client.get('/graphql?query={query_string}'.format(query_string=PIPELINES_OR_ERROR_QUERY)) json_res = json.loads(res.data.decode('utf-8')) assert 'data' in json_res assert 'pipelinesOrError' in json_res['data'] assert 'nodes' in json_res['data']['pipelinesOrError'] assert len(json_res['data']['pipelinesOrError']['nodes']) == n_pipelines return res
def execute_up_command(preview, cli_args, print_fn): handle = recon_repo_for_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.get_definition() python_path = sys.executable repository_path = handle.yaml_path print_changes(repository, instance, print_fn, preview=preview) if preview: return try: reconcile_scheduler_state(python_path, repository_path, repository, instance=instance) except DagsterInvariantViolationError as ex: raise click.UsageError(ex)
def ui(text, file, predefined, variables, remote, output, **kwargs): query = None if text is not None and file is None and predefined is None: query = text.strip('\'" \n\t') elif file is not None and text is None and predefined is None: query = file.read() elif predefined is not None and text is None and file is None: query = PREDEFINED_QUERIES[predefined] else: raise click.UsageError( 'Must select one and only one of text (-t), file (-f), or predefined (-p) ' 'to select GraphQL document to execute.') if remote: res = execute_query_against_remote(remote, query, variables) print(res) else: recon_repo = recon_repo_for_cli_args(kwargs) execute_query_from_cli(recon_repo, query, variables, output)
def construct_environment_yaml(preset_name, config, pipeline_name, module_name): # Load environment dict from either a preset or yaml file globs cli_args = { "fn_name": pipeline_name, "module_name": module_name, } pipeline_def = recon_repo_for_cli_args(cli_args).get_definition().get_pipeline(pipeline_name) if preset_name: if config: raise click.UsageError("Can not use --preset with --config.") run_config = pipeline_def.get_preset(preset_name).run_config else: config = list(config) run_config = load_yaml_from_glob_list(config) if config else {} return run_config
def execute_list_command(cli_args, print_fn): repository = recon_repo_for_cli_args(cli_args).get_definition() title = 'Repository {name}'.format(name=repository.name) print_fn(title) print_fn('*' * len(title)) first = True for pipeline in repository.get_all_pipelines(): pipeline_title = 'Pipeline: {name}'.format(name=pipeline.name) if not first: print_fn('*' * len(pipeline_title)) first = False print_fn(pipeline_title) if pipeline.description: print_fn('Description:') print_fn(format_description(pipeline.description, indent=' ' * 4)) print_fn('Solids: (Execution Order)') for solid in pipeline.solids_in_topological_order: print_fn(' ' + solid.name)
def execute_backfill_command(cli_args, print_fn, instance=None): pipeline_name = cli_args.pop('pipeline_name') repo_args = {k: v for k, v in cli_args.items() if k in REPO_ARG_NAMES} if pipeline_name and not isinstance(pipeline_name, six.string_types): if len(pipeline_name) == 1: pipeline_name = pipeline_name[0] instance = instance or DagsterInstance.get() recon_repo = recon_repo_for_cli_args(repo_args) repo_def = recon_repo.get_definition() noprompt = cli_args.get('noprompt') # Resolve pipeline if not pipeline_name and noprompt: raise click.UsageError('No pipeline specified') if not pipeline_name: pipeline_name = click.prompt( 'Select a pipeline to backfill: {}'.format(', '.join(repo_def.pipeline_names)) ) if not repo_def.has_pipeline(pipeline_name): raise click.UsageError('No pipeline found named `{}`'.format(pipeline_name)) pipeline_def = repo_def.get_pipeline(pipeline_name) # Resolve partition set all_partition_sets = repo_def.partition_set_defs + [ schedule_def.get_partition_set() for schedule_def in repo_def.schedule_defs if isinstance(schedule_def, PartitionScheduleDefinition) ] pipeline_partition_sets = [ x for x in all_partition_sets if x.pipeline_name == pipeline_def.name ] if not pipeline_partition_sets: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format(pipeline_def.name) ) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_sets) == 1: partition_set_name = pipeline_partition_sets[0].name elif noprompt: raise click.UsageError('No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x.name for x in pipeline_partition_sets) ) ) partition_set = next((x for x in pipeline_partition_sets if x.name == partition_set_name), None) if not partition_set: raise click.UsageError('No partition set found named `{}`'.format(partition_set_name)) # Resolve partitions to backfill partitions = gen_partitions_from_args(partition_set, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(pipeline_def.name)) print_fn('Partition set: {}'.format(partition_set.name)) print_fn(' Partitions: {}\n'.format(print_partition_format(partitions, indent_level=15))) # This whole CLI tool should move to more of a "host process" model - but this is how we start repo_location = InProcessRepositoryLocation(recon_repo) external_pipeline = ( repo_location.get_repository(repo_def.name).get_full_external_pipeline(pipeline_name), ) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format(len(partitions)) ): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition in partitions: run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, mode=partition_set.mode, solids_to_execute=frozenset(partition_set.solid_selection) if partition_set and partition_set.solid_selection else None, environment_dict=partition_set.environment_dict_for_partition(partition), tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags), ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn(' Aborted!')
def repository_snapshot_command(output_file, **kwargs): recon_repo = recon_repo_for_cli_args(kwargs) definition = recon_repo.get_definition() ipc_write_unary_response(output_file, external_repository_data_from_def(definition))
def repository_snapshot_command(**kwargs): recon_repo = recon_repo_for_cli_args(kwargs) definition = recon_repo.get_definition() active_data = external_repository_data_from_def(definition) click.echo(serialize_dagster_namedtuple(active_data))
def execute_backfill_command(cli_args, print_fn, instance=None): pipeline_name = cli_args.pop('pipeline_name') repo_args = {k: v for k, v in cli_args.items() if k in REPO_ARG_NAMES} if pipeline_name and not isinstance(pipeline_name, six.string_types): if len(pipeline_name) == 1: pipeline_name = pipeline_name[0] instance = instance or DagsterInstance.get() handle = recon_repo_for_cli_args(repo_args) repository = handle.get_definition() noprompt = cli_args.get('noprompt') # check run launcher if not instance.run_launcher: raise click.UsageError( 'A run launcher must be configured before running a backfill. You can configure a run ' 'launcher (e.g. dagster_graphql.launcher.RemoteDagitRunLauncher) in your instance ' '`dagster.yaml` settings. See ' 'https://docs.dagster.io/docs/deploying/instance/ for more' 'information.') # Resolve pipeline if not pipeline_name and noprompt: raise click.UsageError('No pipeline specified') if not pipeline_name: pipeline_name = click.prompt( 'Select a pipeline to backfill: {}'.format(', '.join( repository.pipeline_names))) repository = handle.get_definition() if not repository.has_pipeline(pipeline_name): raise click.UsageError( 'No pipeline found named `{}`'.format(pipeline_name)) pipeline = repository.get_pipeline(pipeline_name) # Resolve partition set all_partition_sets = get_partition_sets_for_handle(handle) pipeline_partition_sets = [ x for x in all_partition_sets if x.pipeline_name == pipeline.name ] if not pipeline_partition_sets: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format(pipeline.name)) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_sets) == 1: partition_set_name = pipeline_partition_sets[0].name elif noprompt: raise click.UsageError( 'No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x.name for x in pipeline_partition_sets))) partition_set = next( (x for x in pipeline_partition_sets if x.name == partition_set_name), None) if not partition_set: raise click.UsageError( 'No partition set found named `{}`'.format(partition_set_name)) # Resolve partitions to backfill partitions = gen_partitions_from_args(partition_set, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(pipeline.name)) print_fn('Partition set: {}'.format(partition_set.name)) print_fn(' Partitions: {}\n'.format( print_partition_format(partitions, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format( len(partitions))): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition in partitions: run = instance.create_run_for_pipeline( pipeline_def=pipeline, mode=partition_set.mode, solid_subset=partition_set.solid_subset, environment_dict=partition_set.environment_dict_for_partition( partition), tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags), ) instance.launch_run(run.run_id) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn(' Aborted!')
def execute_run_command(output_file, pipeline_run_id, instance_ref, **kwargs): recon_repo = recon_repo_for_cli_args(kwargs) return _execute_run_command_body(output_file, recon_repo, pipeline_run_id, instance_ref)