Example #1
0
def execute_start_command(schedule_name, all_flag, cli_args, print_fn):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()
    repository_name = repository.name

    if all_flag:
        for schedule in instance.all_schedules(repository_name):
            try:
                schedule = instance.start_schedule_and_update_storage_state(
                    repository_name, schedule.name)
            except DagsterInvariantViolationError as ex:
                raise click.UsageError(ex)

        print_fn(
            "Started all schedules for repository {repository_name}".format(
                repository_name=repository_name))
    else:
        try:
            schedule = instance.start_schedule_and_update_storage_state(
                repository_name, schedule_name)
        except DagsterInvariantViolationError as ex:
            raise click.UsageError(ex)

        print_fn("Started schedule {schedule_name}".format(
            schedule_name=schedule_name))
Example #2
0
def construct_environment_yaml(preset_name, env, pipeline_name, module_name):
    # Load environment dict from either a preset or yaml file globs
    if preset_name:
        if env:
            raise click.UsageError('Can not use --preset with --env.')

        cli_args = {
            'fn_name': pipeline_name,
            'pipeline_name': pipeline_name,
            'module_name': module_name,
        }
        pipeline = recon_repo_for_cli_args(
            cli_args).get_definition().get_pipeline(pipeline_name)
        environment_dict = pipeline.get_preset(preset_name).environment_dict

    else:
        env = list(env)
        environment_dict = load_yaml_from_glob_list(env) if env else {}

    # If not provided by the user, ensure we have storage location defined
    if 'storage' not in environment_dict:
        system_tmp_path = seven.get_system_temp_directory()
        dagster_tmp_path = os.path.join(system_tmp_path, 'dagster-airflow',
                                        pipeline_name)
        environment_dict['storage'] = {
            'filesystem': {
                'config': {
                    'base_dir': six.ensure_str(dagster_tmp_path)
                }
            }
        }

    return environment_dict
Example #3
0
def construct_environment_yaml(preset_name, config, pipeline_name,
                               module_name):
    # Load environment dict from either a preset or yaml file globs
    if preset_name:
        if config:
            raise click.UsageError("Can not use --preset with --config.")

        cli_args = {
            "fn_name": pipeline_name,
            "pipeline_name": pipeline_name,
            "module_name": module_name,
        }
        pipeline = recon_repo_for_cli_args(
            cli_args).get_definition().get_pipeline(pipeline_name)
        run_config = pipeline.get_preset(preset_name).run_config

    else:
        config = list(config)
        run_config = load_yaml_from_glob_list(config) if config else {}

    # If not provided by the user, ensure we have storage location defined
    if "intermediate_storage" not in run_config:
        system_tmp_path = seven.get_system_temp_directory()
        dagster_tmp_path = os.path.join(system_tmp_path, "dagster-airflow",
                                        pipeline_name)
        run_config["intermediate_storage"] = {
            "filesystem": {
                "config": {
                    "base_dir": six.ensure_str(dagster_tmp_path)
                }
            }
        }

    return run_config
Example #4
0
def construct_environment_yaml(preset_name, config, pipeline_name, module_name):
    # Load environment dict from either a preset or yaml file globs
    cli_args = {
        "fn_name": pipeline_name,
        "module_name": module_name,
    }

    pipeline_def = recon_repo_for_cli_args(cli_args).get_definition().get_pipeline(pipeline_name)

    if preset_name:
        if config:
            raise click.UsageError("Can not use --preset with --config.")

        run_config = pipeline_def.get_preset(preset_name).run_config

    else:
        config = list(config)
        run_config = load_yaml_from_glob_list(config) if config else {}

    if (
        not can_isolate_steps(pipeline_def, pipeline_def.get_default_mode())
        and "intermediate_storage" not in run_config
    ):
        system_tmp_path = seven.get_system_temp_directory()
        dagster_tmp_path = os.path.join(system_tmp_path, "dagster-airflow", pipeline_name)
        run_config["intermediate_storage"] = {
            "filesystem": {"config": {"base_dir": dagster_tmp_path}}
        }

    return run_config
Example #5
0
def execute_restart_command(schedule_name, all_running_flag, cli_args, print_fn):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()

    if all_running_flag:
        for schedule in instance.all_schedules(repository):
            if schedule.status == ScheduleStatus.RUNNING:
                try:
                    instance.stop_schedule(repository, schedule.name)
                    instance.start_schedule(repository, schedule.name)
                except DagsterInvariantViolationError as ex:
                    raise click.UsageError(ex)

        print_fn(
            "Restarted all running schedules for repository {name}".format(name=repository.name)
        )
    else:
        schedule = instance.get_schedule_by_name(repository, schedule_name)
        if schedule.status != ScheduleStatus.RUNNING:
            click.UsageError(
                "Cannot restart a schedule {name} because is not currently running".format(
                    name=schedule.name
                )
            )

        try:
            instance.stop_schedule(repository, schedule_name)
            instance.start_schedule(repository, schedule_name)
        except DagsterInvariantViolationError as ex:
            raise click.UsageError(ex)

        print_fn("Restarted schedule {schedule_name}".format(schedule_name=schedule_name))
Example #6
0
def execute_preview_command(cli_args, print_fn):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()

    print_changes(repository, instance, print_fn, preview=True)
Example #7
0
def host_dagit_ui(host,
                  port,
                  storage_fallback,
                  reload_trigger=None,
                  port_lookup=True,
                  **kwargs):
    return host_dagit_ui_with_reconstructable_repo(
        recon_repo_for_cli_args(kwargs), host, port, storage_fallback,
        reload_trigger, port_lookup)
Example #8
0
def execute_logs_command(schedule_name, cli_args, print_fn, instance=None):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()
    repository_name = repository.name

    logs_path = os.path.join(
        instance.logs_path_for_schedule(repository_name, schedule_name))
    print_fn(logs_path)
Example #9
0
def execute_wipe_command(cli_args, print_fn):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    confirmation = click.prompt(
        'Are you sure you want to delete all schedules and schedule cron jobs? Type DELETE'
    )
    if confirmation == 'DELETE':
        instance.wipe_all_schedules()
        print_fn("Wiped all schedules and schedule cron jobs")
    else:
        click.echo('Exiting without deleting all schedules and schedule cron jobs')
Example #10
0
def execute_stop_command(schedule_name, cli_args, print_fn, instance=None):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()

    try:
        instance.stop_schedule(repository, schedule_name)
    except DagsterInvariantViolationError as ex:
        raise click.UsageError(ex)

    print_fn("Stopped schedule {schedule_name}".format(schedule_name=schedule_name))
Example #11
0
def execute_list_command(running_filter, stopped_filter, name_filter, cli_args,
                         print_fn):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()
    repository_name = repository.name

    if not name_filter:
        title = 'Repository {name}'.format(name=repository_name)
        print_fn(title)
        print_fn('*' * len(title))

    first = True

    if running_filter:
        schedules = [
            s for s in instance.all_schedules(repository_name)
            if s.status == ScheduleStatus.RUNNING
        ]
    elif stopped_filter:
        schedules = [
            s for s in instance.all_schedules(repository_name)
            if s.status == ScheduleStatus.STOPPED
        ]
    else:
        schedules = instance.all_schedules(repository_name)

    for schedule in schedules:
        schedule_def = repository.get_schedule_def(schedule.name)

        # If --name filter is present, only print the schedule name
        if name_filter:
            print_fn(schedule_def.name)
            continue

        flag = "[{status}]".format(
            status=schedule.status.value) if schedule else ""
        schedule_title = 'Schedule: {name} {flag}'.format(
            name=schedule_def.name, flag=flag)

        if not first:
            print_fn('*' * len(schedule_title))
        first = False

        print_fn(schedule_title)
        print_fn('Cron Schedule: {cron_schedule}'.format(
            cron_schedule=schedule_def.cron_schedule))
def load_dagit_for_repo_cli_args(n_pipelines=1, **kwargs):
    handle = recon_repo_for_cli_args(kwargs)

    app = create_app_with_reconstructable_repo(handle, DagsterInstance.ephemeral())

    client = app.test_client()

    res = client.get('/graphql?query={query_string}'.format(query_string=PIPELINES_OR_ERROR_QUERY))
    json_res = json.loads(res.data.decode('utf-8'))
    assert 'data' in json_res
    assert 'pipelinesOrError' in json_res['data']
    assert 'nodes' in json_res['data']['pipelinesOrError']
    assert len(json_res['data']['pipelinesOrError']['nodes']) == n_pipelines

    return res
Example #13
0
def execute_up_command(preview, cli_args, print_fn):
    handle = recon_repo_for_cli_args(cli_args)
    instance = DagsterInstance.get()
    check_handle_and_scheduler(handle, instance)

    repository = handle.get_definition()
    python_path = sys.executable
    repository_path = handle.yaml_path

    print_changes(repository, instance, print_fn, preview=preview)
    if preview:
        return

    try:
        reconcile_scheduler_state(python_path, repository_path, repository, instance=instance)
    except DagsterInvariantViolationError as ex:
        raise click.UsageError(ex)
Example #14
0
def ui(text, file, predefined, variables, remote, output, **kwargs):
    query = None
    if text is not None and file is None and predefined is None:
        query = text.strip('\'" \n\t')
    elif file is not None and text is None and predefined is None:
        query = file.read()
    elif predefined is not None and text is None and file is None:
        query = PREDEFINED_QUERIES[predefined]
    else:
        raise click.UsageError(
            'Must select one and only one of text (-t), file (-f), or predefined (-p) '
            'to select GraphQL document to execute.')

    if remote:
        res = execute_query_against_remote(remote, query, variables)
        print(res)
    else:
        recon_repo = recon_repo_for_cli_args(kwargs)
        execute_query_from_cli(recon_repo, query, variables, output)
Example #15
0
def construct_environment_yaml(preset_name, config, pipeline_name, module_name):
    # Load environment dict from either a preset or yaml file globs
    cli_args = {
        "fn_name": pipeline_name,
        "module_name": module_name,
    }

    pipeline_def = recon_repo_for_cli_args(cli_args).get_definition().get_pipeline(pipeline_name)

    if preset_name:
        if config:
            raise click.UsageError("Can not use --preset with --config.")

        run_config = pipeline_def.get_preset(preset_name).run_config

    else:
        config = list(config)
        run_config = load_yaml_from_glob_list(config) if config else {}

    return run_config
Example #16
0
def execute_list_command(cli_args, print_fn):
    repository = recon_repo_for_cli_args(cli_args).get_definition()

    title = 'Repository {name}'.format(name=repository.name)
    print_fn(title)
    print_fn('*' * len(title))
    first = True
    for pipeline in repository.get_all_pipelines():
        pipeline_title = 'Pipeline: {name}'.format(name=pipeline.name)

        if not first:
            print_fn('*' * len(pipeline_title))
        first = False

        print_fn(pipeline_title)
        if pipeline.description:
            print_fn('Description:')
            print_fn(format_description(pipeline.description, indent=' ' * 4))
        print_fn('Solids: (Execution Order)')
        for solid in pipeline.solids_in_topological_order:
            print_fn('    ' + solid.name)
Example #17
0
def execute_backfill_command(cli_args, print_fn, instance=None):
    pipeline_name = cli_args.pop('pipeline_name')
    repo_args = {k: v for k, v in cli_args.items() if k in REPO_ARG_NAMES}
    if pipeline_name and not isinstance(pipeline_name, six.string_types):
        if len(pipeline_name) == 1:
            pipeline_name = pipeline_name[0]

    instance = instance or DagsterInstance.get()
    recon_repo = recon_repo_for_cli_args(repo_args)
    repo_def = recon_repo.get_definition()
    noprompt = cli_args.get('noprompt')

    # Resolve pipeline
    if not pipeline_name and noprompt:
        raise click.UsageError('No pipeline specified')
    if not pipeline_name:
        pipeline_name = click.prompt(
            'Select a pipeline to backfill: {}'.format(', '.join(repo_def.pipeline_names))
        )
    if not repo_def.has_pipeline(pipeline_name):
        raise click.UsageError('No pipeline found named `{}`'.format(pipeline_name))

    pipeline_def = repo_def.get_pipeline(pipeline_name)

    # Resolve partition set
    all_partition_sets = repo_def.partition_set_defs + [
        schedule_def.get_partition_set()
        for schedule_def in repo_def.schedule_defs
        if isinstance(schedule_def, PartitionScheduleDefinition)
    ]

    pipeline_partition_sets = [
        x for x in all_partition_sets if x.pipeline_name == pipeline_def.name
    ]
    if not pipeline_partition_sets:
        raise click.UsageError(
            'No partition sets found for pipeline `{}`'.format(pipeline_def.name)
        )
    partition_set_name = cli_args.get('partition_set')
    if not partition_set_name:
        if len(pipeline_partition_sets) == 1:
            partition_set_name = pipeline_partition_sets[0].name
        elif noprompt:
            raise click.UsageError('No partition set specified (see option `--partition-set`)')
        else:
            partition_set_name = click.prompt(
                'Select a partition set to use for backfill: {}'.format(
                    ', '.join(x.name for x in pipeline_partition_sets)
                )
            )
    partition_set = next((x for x in pipeline_partition_sets if x.name == partition_set_name), None)
    if not partition_set:
        raise click.UsageError('No partition set found named `{}`'.format(partition_set_name))

    # Resolve partitions to backfill
    partitions = gen_partitions_from_args(partition_set, cli_args)

    # Print backfill info
    print_fn('\n     Pipeline: {}'.format(pipeline_def.name))
    print_fn('Partition set: {}'.format(partition_set.name))
    print_fn('   Partitions: {}\n'.format(print_partition_format(partitions, indent_level=15)))

    # This whole CLI tool should move to more of a "host process" model - but this is how we start
    repo_location = InProcessRepositoryLocation(recon_repo)
    external_pipeline = (
        repo_location.get_repository(repo_def.name).get_full_external_pipeline(pipeline_name),
    )

    # Confirm and launch
    if noprompt or click.confirm(
        'Do you want to proceed with the backfill ({} partitions)?'.format(len(partitions))
    ):

        print_fn('Launching runs... ')
        backfill_id = make_new_backfill_id()

        run_tags = merge_dicts(
            PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args),
        )

        for partition in partitions:
            run = instance.create_run_for_pipeline(
                pipeline_def=pipeline_def,
                mode=partition_set.mode,
                solids_to_execute=frozenset(partition_set.solid_selection)
                if partition_set and partition_set.solid_selection
                else None,
                environment_dict=partition_set.environment_dict_for_partition(partition),
                tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags),
            )

            instance.launch_run(run.run_id, external_pipeline)
            # Remove once we can handle synchronous execution... currently limited by sqlite
            time.sleep(0.1)

        print_fn('Launched backfill job `{}`'.format(backfill_id))
    else:
        print_fn(' Aborted!')
Example #18
0
def repository_snapshot_command(output_file, **kwargs):
    recon_repo = recon_repo_for_cli_args(kwargs)
    definition = recon_repo.get_definition()
    ipc_write_unary_response(output_file,
                             external_repository_data_from_def(definition))
Example #19
0
def repository_snapshot_command(**kwargs):
    recon_repo = recon_repo_for_cli_args(kwargs)
    definition = recon_repo.get_definition()

    active_data = external_repository_data_from_def(definition)
    click.echo(serialize_dagster_namedtuple(active_data))
Example #20
0
def execute_backfill_command(cli_args, print_fn, instance=None):
    pipeline_name = cli_args.pop('pipeline_name')
    repo_args = {k: v for k, v in cli_args.items() if k in REPO_ARG_NAMES}
    if pipeline_name and not isinstance(pipeline_name, six.string_types):
        if len(pipeline_name) == 1:
            pipeline_name = pipeline_name[0]

    instance = instance or DagsterInstance.get()
    handle = recon_repo_for_cli_args(repo_args)
    repository = handle.get_definition()
    noprompt = cli_args.get('noprompt')

    # check run launcher
    if not instance.run_launcher:
        raise click.UsageError(
            'A run launcher must be configured before running a backfill. You can configure a run '
            'launcher (e.g. dagster_graphql.launcher.RemoteDagitRunLauncher) in your instance '
            '`dagster.yaml` settings. See '
            'https://docs.dagster.io/docs/deploying/instance/ for more'
            'information.')

    # Resolve pipeline
    if not pipeline_name and noprompt:
        raise click.UsageError('No pipeline specified')
    if not pipeline_name:
        pipeline_name = click.prompt(
            'Select a pipeline to backfill: {}'.format(', '.join(
                repository.pipeline_names)))
    repository = handle.get_definition()
    if not repository.has_pipeline(pipeline_name):
        raise click.UsageError(
            'No pipeline found named `{}`'.format(pipeline_name))

    pipeline = repository.get_pipeline(pipeline_name)

    # Resolve partition set
    all_partition_sets = get_partition_sets_for_handle(handle)
    pipeline_partition_sets = [
        x for x in all_partition_sets if x.pipeline_name == pipeline.name
    ]
    if not pipeline_partition_sets:
        raise click.UsageError(
            'No partition sets found for pipeline `{}`'.format(pipeline.name))
    partition_set_name = cli_args.get('partition_set')
    if not partition_set_name:
        if len(pipeline_partition_sets) == 1:
            partition_set_name = pipeline_partition_sets[0].name
        elif noprompt:
            raise click.UsageError(
                'No partition set specified (see option `--partition-set`)')
        else:
            partition_set_name = click.prompt(
                'Select a partition set to use for backfill: {}'.format(
                    ', '.join(x.name for x in pipeline_partition_sets)))
    partition_set = next(
        (x for x in pipeline_partition_sets if x.name == partition_set_name),
        None)
    if not partition_set:
        raise click.UsageError(
            'No partition set found named `{}`'.format(partition_set_name))

    # Resolve partitions to backfill
    partitions = gen_partitions_from_args(partition_set, cli_args)

    # Print backfill info
    print_fn('\n     Pipeline: {}'.format(pipeline.name))
    print_fn('Partition set: {}'.format(partition_set.name))
    print_fn('   Partitions: {}\n'.format(
        print_partition_format(partitions, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
            'Do you want to proceed with the backfill ({} partitions)?'.format(
                len(partitions))):

        print_fn('Launching runs... ')
        backfill_id = make_new_backfill_id()

        run_tags = merge_dicts(
            PipelineRun.tags_for_backfill_id(backfill_id),
            get_tags_from_args(cli_args),
        )

        for partition in partitions:
            run = instance.create_run_for_pipeline(
                pipeline_def=pipeline,
                mode=partition_set.mode,
                solid_subset=partition_set.solid_subset,
                environment_dict=partition_set.environment_dict_for_partition(
                    partition),
                tags=merge_dicts(partition_set.tags_for_partition(partition),
                                 run_tags),
            )
            instance.launch_run(run.run_id)
            # Remove once we can handle synchronous execution... currently limited by sqlite
            time.sleep(0.1)

        print_fn('Launched backfill job `{}`'.format(backfill_id))
    else:
        print_fn(' Aborted!')
Example #21
0
def execute_run_command(output_file, pipeline_run_id, instance_ref, **kwargs):
    recon_repo = recon_repo_for_cli_args(kwargs)

    return _execute_run_command_body(output_file, recon_repo, pipeline_run_id,
                                     instance_ref)