def create_and_launch_partition_backfill(graphene_info, backfill_params): from ...schema.backfill import GrapheneLaunchBackfillSuccess from ...schema.errors import GraphenePartitionSetNotFoundError partition_set_selector = backfill_params.get("selector") partition_set_name = partition_set_selector.get("partitionSetName") repository_selector = RepositorySelector.from_graphql_input( partition_set_selector.get("repositorySelector")) location = graphene_info.context.get_repository_location( repository_selector.location_name) repository = location.get_repository(repository_selector.repository_name) matches = [ partition_set for partition_set in repository.get_external_partition_sets() if partition_set.name == partition_set_selector.get("partitionSetName") ] if not matches: return GraphenePartitionSetNotFoundError(partition_set_name) check.invariant( len(matches) == 1, "Partition set names must be unique: found {num} matches for {partition_set_name}" .format(num=len(matches), partition_set_name=partition_set_name), ) external_partition_set = next(iter(matches)) partition_names = backfill_params.get("partitionNames") backfill_id = make_new_backfill_id() backfill = PartitionBackfill( backfill_id=backfill_id, partition_set_origin=external_partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=partition_names, from_failure=bool(backfill_params.get("fromFailure")), reexecution_steps=backfill_params.get("reexecutionSteps"), tags={t["key"]: t["value"] for t in backfill_params.get("tags", [])}, backfill_timestamp=pendulum.now("UTC").timestamp(), ) if backfill_params.get("forceSynchronousSubmission"): # should only be used in a test situation to_submit = [name for name in partition_names] submitted_run_ids = [] while to_submit: chunk = to_submit[:BACKFILL_CHUNK_SIZE] to_submit = to_submit[BACKFILL_CHUNK_SIZE:] submitted_run_ids.extend(run_id for run_id in submit_backfill_runs( graphene_info.context.instance, location, backfill, partition_names=chunk) if run_id != None) return GrapheneLaunchBackfillSuccess( backfill_id=backfill_id, launched_run_ids=submitted_run_ids) graphene_info.context.instance.add_backfill(backfill) return GrapheneLaunchBackfillSuccess(backfill_id=backfill_id)
def create_and_launch_partition_backfill(graphene_info, backfill_params): from ...schema.backfill import GraphenePartitionBackfillSuccess from ...schema.errors import GraphenePartitionSetNotFoundError partition_set_selector = backfill_params.get("selector") partition_set_name = partition_set_selector.get("partitionSetName") repository_selector = RepositorySelector.from_graphql_input( partition_set_selector.get("repositorySelector")) location = graphene_info.context.get_repository_location( repository_selector.location_name) repository = location.get_repository(repository_selector.repository_name) matches = [ partition_set for partition_set in repository.get_external_partition_sets() if partition_set.name == partition_set_selector.get("partitionSetName") ] if not matches: return GraphenePartitionSetNotFoundError(partition_set_name) check.invariant( len(matches) == 1, "Partition set names must be unique: found {num} matches for {partition_set_name}" .format(num=len(matches), partition_set_name=partition_set_name), ) external_partition_set = next(iter(matches)) partition_names = backfill_params.get("partitionNames") backfill_id = make_new_backfill_id() backfill = PartitionBackfill( backfill_id=backfill_id, partition_set_origin=external_partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=partition_names, from_failure=bool(backfill_params.get("fromFailure")), reexecution_steps=backfill_params.get("reexecutionSteps"), tags={t["key"]: t["value"] for t in backfill_params.get("tags", [])}, backfill_timestamp=pendulum.now("UTC").timestamp(), ) backfill_settings = graphene_info.context.instance.get_settings( "backfill") or {} daemonEnabled = backfill_settings.get("daemon_enabled") if daemonEnabled and not graphene_info.context.instance.has_bulk_actions_table( ): check.failed( "A schema migration is required before daemon-based backfills can be supported. " "Try running `dagster instance migrate` to migrate your instance and try again." ) elif daemonEnabled: graphene_info.context.instance.add_backfill(backfill) return GraphenePartitionBackfillSuccess(backfill_id=backfill_id) else: submitted_run_ids = submit_backfill_runs( graphene_info.context.instance, location, backfill) return GraphenePartitionBackfillSuccess( backfill_id=backfill_id, launched_run_ids=submitted_run_ids)
def execute_partition_set(partition_set, partition_filter, instance=None): '''Programatically perform a backfill over a partition set Arguments: partition_set (PartitionSet): The base partition set to run the backfill over partition_filter (Callable[[List[Partition]]], List[Partition]): A function that takes a list of partitions and returns a filtered list of partitions to run the backfill over. instance (DagsterInstance): The instance to use to perform the backfill ''' check.inst_param(partition_set, 'partition_set', PartitionSetDefinition) check.callable_param(partition_filter, 'partition_filter') check.inst_param(instance, 'instance', DagsterInstance) candidate_partitions = partition_set.get_partitions() partitions = partition_filter(candidate_partitions) instance = instance or DagsterInstance.ephemeral() for partition in partitions: run = PipelineRun( pipeline_name=partition_set.pipeline_name, run_id=make_new_run_id(), selector=ExecutionSelector(partition_set.pipeline_name), environment_dict=partition_set.environment_dict_for_partition( partition), mode='default', tags=merge_dicts( PipelineRun.tags_for_backfill_id(make_new_backfill_id()), partition_set.tags_for_partition(partition), ), status=PipelineRunStatus.NOT_STARTED, ) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) instance.launch_run(run)
def _execute_backfill_command_at_location(cli_args, print_fn, instance, workspace, repo_location): external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get("pipeline"), ) noprompt = cli_args.get("noprompt") pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( "No partition sets found for pipeline `{}`".format(external_pipeline.name) ) partition_set_name = cli_args.get("partition_set") if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter(pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError("No partition set specified (see option `--partition-set`)") else: partition_set_name = click.prompt( "Select a partition set to use for backfill: {}".format( ", ".join(x for x in pipeline_partition_set_names.keys()) ) ) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError("No partition set found named `{}`".format(partition_set_name)) run_tags = get_tags_from_args(cli_args) repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location=repo_location, ) try: partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) raise DagsterBackfillFailedError( "Failure fetching partition names for {partition_set_name}: {error_message}".format( partition_set_name=partition_set_name, error_message=error_info.message, ), serialized_error_info=error_info, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args ) # Print backfill info print_fn("\n Pipeline: {}".format(external_pipeline.name)) print_fn("Partition set: {}".format(partition_set_name)) print_fn(" Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names)) ): print_fn("Launching runs... ") backfill_id = make_new_backfill_id() backfill_job = PartitionBackfill( backfill_id=backfill_id, partition_set_origin=partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=partition_names, from_failure=False, reexecution_steps=None, tags=run_tags, backfill_timestamp=pendulum.now("UTC").timestamp(), ) try: partition_execution_data = ( repo_location.get_external_partition_set_execution_param_data( repository_handle=repo_handle, partition_set_name=partition_set_name, partition_names=partition_names, ) ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) instance.add_backfill( backfill_job.with_status(BulkActionStatus.FAILED).with_error(error_info) ) return print_fn("Backfill failed: {}".format(error_info)) assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData) for partition_data in partition_execution_data.partition_data: pipeline_run = create_backfill_run( instance, repo_location, external_pipeline, partition_set, backfill_job, partition_data, ) if pipeline_run: instance.submit_run(pipeline_run.run_id, workspace) instance.add_backfill(backfill_job.with_status(BulkActionStatus.COMPLETED)) print_fn("Launched backfill job `{}`".format(backfill_id)) else: print_fn("Aborted!")
def execute_backfill_command(cli_args, print_fn, instance=None): instance = instance or DagsterInstance.get() external_pipeline = get_external_pipeline_from_kwargs(cli_args, instance) external_repository = get_external_repository_from_kwargs(cli_args, instance) # We should move this to use external repository # https://github.com/dagster-io/dagster/issues/2556 recon_repo = recon_repo_from_external_repo(external_repository) repo_def = recon_repo.get_definition() noprompt = cli_args.get('noprompt') pipeline_def = repo_def.get_pipeline(external_pipeline.name) # Resolve partition set all_partition_sets = repo_def.partition_set_defs + [ schedule_def.get_partition_set() for schedule_def in repo_def.schedule_defs if isinstance(schedule_def, PartitionScheduleDefinition) ] pipeline_partition_sets = [ x for x in all_partition_sets if x.pipeline_name == pipeline_def.name ] if not pipeline_partition_sets: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format(pipeline_def.name) ) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_sets) == 1: partition_set_name = pipeline_partition_sets[0].name elif noprompt: raise click.UsageError('No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x.name for x in pipeline_partition_sets) ) ) partition_set = next((x for x in pipeline_partition_sets if x.name == partition_set_name), None) if not partition_set: raise click.UsageError('No partition set found named `{}`'.format(partition_set_name)) # Resolve partitions to backfill partitions = gen_partitions_from_args(partition_set, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(pipeline_def.name)) print_fn('Partition set: {}'.format(partition_set.name)) print_fn(' Partitions: {}\n'.format(print_partition_format(partitions, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format(len(partitions)) ): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition in partitions: run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, mode=partition_set.mode, solids_to_execute=frozenset(partition_set.solid_selection) if partition_set and partition_set.solid_selection else None, run_config=partition_set.run_config_for_partition(partition), tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags), ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn(' Aborted!')
def _execute_backfill_command_at_location(cli_args, print_fn, instance, repo_location): external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get("pipeline"), ) noprompt = cli_args.get("noprompt") pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( "No partition sets found for pipeline `{}`".format(external_pipeline.name) ) partition_set_name = cli_args.get("partition_set") if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter(pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError("No partition set specified (see option `--partition-set`)") else: partition_set_name = click.prompt( "Select a partition set to use for backfill: {}".format( ", ".join(x for x in pipeline_partition_set_names.keys()) ) ) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError("No partition set found named `{}`".format(partition_set_name)) mode = partition_set.mode solid_selection = partition_set.solid_selection run_tags = get_tags_from_args(cli_args) repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location_handle=repo_location.location_handle, ) # Resolve partitions to backfill partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( "Failure fetching partition names for {partition_set_name}: {error_message}".format( partition_set_name=partition_set_name, error_message=partition_names_or_error.error.message, ), serialized_error_info=partition_names_or_error.error, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args ) # Print backfill info print_fn("\n Pipeline: {}".format(external_pipeline.name)) print_fn("Partition set: {}".format(partition_set_name)) print_fn(" Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names)) ): print_fn("Launching runs... ") backfill_id = make_new_backfill_id() backfill_tags = PipelineRun.tags_for_backfill_id(backfill_id) partition_execution_data = repo_location.get_external_partition_set_execution_param_data( repository_handle=repo_handle, partition_set_name=partition_set_name, partition_names=partition_names, ) if isinstance(partition_execution_data, ExternalPartitionExecutionErrorData): return print_fn("Backfill failed: {}".format(partition_execution_data.error)) assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData) for partition_data in partition_execution_data.partition_data: run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=partition_data.run_config, mode=mode, preset=None, tags=merge_dicts(merge_dicts(partition_data.tags, backfill_tags), run_tags), solid_selection=frozenset(solid_selection) if solid_selection else None, ) instance.launch_run(run.run_id, external_pipeline) print_fn("Launched backfill job `{}`".format(backfill_id)) else: print_fn("Aborted!")
def execute_backfill_command(cli_args, print_fn, instance=None): pipeline_name = cli_args.pop('pipeline_name') repo_args = {k: v for k, v in cli_args.items() if k in REPO_ARG_NAMES} if pipeline_name and not isinstance(pipeline_name, six.string_types): if len(pipeline_name) == 1: pipeline_name = pipeline_name[0] instance = instance or DagsterInstance.get() handle = handle_for_repo_cli_args(repo_args) repository = handle.build_repository_definition() noprompt = cli_args.get('noprompt') # check run launcher if not instance.run_launcher: raise click.UsageError( 'A run launcher must be configured before running a backfill. You can configure a run ' 'launcher (e.g. dagster_graphql.launcher.RemoteDagitRunLauncher) in your instance ' '`dagster.yaml` settings. See ' 'https://docs.dagster.io/latest/deploying/instance/ for more' 'information.') # Resolve pipeline if not pipeline_name and noprompt: raise click.UsageError('No pipeline specified') if not pipeline_name: pipeline_name = click.prompt( 'Select a pipeline to backfill: {}'.format(', '.join( repository.pipeline_names))) repository = handle.build_repository_definition() if not repository.has_pipeline(pipeline_name): raise click.UsageError( 'No pipeline found named `{}`'.format(pipeline_name)) pipeline = repository.get_pipeline(pipeline_name) # Resolve partition set all_partition_sets = get_partition_sets_for_handle(handle) pipeline_partition_sets = [ x for x in all_partition_sets if x.pipeline_name == pipeline.name ] if not pipeline_partition_sets: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format(pipeline.name)) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_sets) == 1: partition_set_name = pipeline_partition_sets[0].name elif noprompt: raise click.UsageError( 'No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x.name for x in pipeline_partition_sets))) partition_set = next( (x for x in pipeline_partition_sets if x.name == partition_set_name), None) if not partition_set: raise click.UsageError( 'No partition set found named `{}`'.format(partition_set_name)) # Resolve partitions to backfill partitions = gen_partitions_from_args(partition_set, cli_args) # Resolve priority celery_priority = get_backfill_priority_from_args(cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(pipeline.name)) print_fn('Partition set: {}'.format(partition_set.name)) print_fn(' Partitions: {}\n'.format( print_partition_format(partitions, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format( len(partitions))): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) # for backwards compatibility - remove once prezi switched over to using tags argument if celery_priority is not None: run_tags['dagster-celery/run_priority'] = celery_priority for partition in partitions: run = PipelineRun( pipeline_name=pipeline.name, run_id=make_new_run_id(), selector=ExecutionSelector(pipeline.name), environment_dict=partition_set.environment_dict_for_partition( partition), mode=cli_args.get('mode') or 'default', tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags), status=PipelineRunStatus.NOT_STARTED, ) instance.launch_run(run) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn(' Aborted!')
def create_and_launch_partition_backfill(graphene_info, backfill_params): partition_set_selector = backfill_params.get("selector") partition_set_name = partition_set_selector.get("partitionSetName") repository_selector = RepositorySelector.from_graphql_input( partition_set_selector.get("repositorySelector")) location = graphene_info.context.get_repository_location( repository_selector.location_name) repository = location.get_repository(repository_selector.repository_name) matches = [ partition_set for partition_set in repository.get_external_partition_sets() if partition_set.name == partition_set_selector.get("partitionSetName") ] if not matches: return graphene_info.schema.type_named("PartitionSetNotFoundError")( partition_set_name) check.invariant( len(matches) == 1, "Partition set names must be unique: found {num} matches for {partition_set_name}" .format(num=len(matches), partition_set_name=partition_set_name), ) external_partition_set = next(iter(matches)) external_pipeline = repository.get_full_external_pipeline( external_partition_set.pipeline_name) pipeline_selector = PipelineSelector( location_name=location.name, repository_name=repository.name, pipeline_name=external_pipeline.name, solid_selection=external_partition_set.solid_selection, ) partition_names = backfill_params.get("partitionNames") backfill_id = make_new_backfill_id() result = graphene_info.context.get_external_partition_set_execution_param_data( repository.handle, partition_set_name, partition_names) if isinstance(result, ExternalPartitionExecutionErrorData): return graphene_info.schema.type_named("PythonError")(result.error) assert isinstance(result, ExternalPartitionSetExecutionParamData) launched_run_ids = [] execution_param_list = _build_execution_param_list_for_backfill( graphene_info.context.instance, result.partition_data, backfill_id, backfill_params, pipeline_selector, external_partition_set, ) for execution_params in execution_param_list: pipeline_run = create_valid_pipeline_run(graphene_info, external_pipeline, execution_params) graphene_info.context.instance.launch_run(pipeline_run.run_id, external_pipeline) launched_run_ids.append(pipeline_run.run_id) return graphene_info.schema.type_named("PartitionBackfillSuccess")( backfill_id=backfill_id, launched_run_ids=launched_run_ids)
def execute_backfill_command(cli_args, print_fn, instance=None): pipeline_name = cli_args.pop('pipeline_name') repo_args = {k: v for k, v in cli_args.items() if k in REPO_ARG_NAMES} if pipeline_name and not isinstance(pipeline_name, six.string_types): if len(pipeline_name) == 1: pipeline_name = pipeline_name[0] instance = instance or DagsterInstance.get() recon_repo = recon_repo_for_cli_args(repo_args) repo_def = recon_repo.get_definition() noprompt = cli_args.get('noprompt') # Resolve pipeline if not pipeline_name and noprompt: raise click.UsageError('No pipeline specified') if not pipeline_name: pipeline_name = click.prompt( 'Select a pipeline to backfill: {}'.format(', '.join(repo_def.pipeline_names)) ) if not repo_def.has_pipeline(pipeline_name): raise click.UsageError('No pipeline found named `{}`'.format(pipeline_name)) pipeline_def = repo_def.get_pipeline(pipeline_name) # Resolve partition set all_partition_sets = repo_def.partition_set_defs + [ schedule_def.get_partition_set() for schedule_def in repo_def.schedule_defs if isinstance(schedule_def, PartitionScheduleDefinition) ] pipeline_partition_sets = [ x for x in all_partition_sets if x.pipeline_name == pipeline_def.name ] if not pipeline_partition_sets: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format(pipeline_def.name) ) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_sets) == 1: partition_set_name = pipeline_partition_sets[0].name elif noprompt: raise click.UsageError('No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x.name for x in pipeline_partition_sets) ) ) partition_set = next((x for x in pipeline_partition_sets if x.name == partition_set_name), None) if not partition_set: raise click.UsageError('No partition set found named `{}`'.format(partition_set_name)) # Resolve partitions to backfill partitions = gen_partitions_from_args(partition_set, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(pipeline_def.name)) print_fn('Partition set: {}'.format(partition_set.name)) print_fn(' Partitions: {}\n'.format(print_partition_format(partitions, indent_level=15))) # This whole CLI tool should move to more of a "host process" model - but this is how we start repo_location = InProcessRepositoryLocation(recon_repo) external_pipeline = ( repo_location.get_repository(repo_def.name).get_full_external_pipeline(pipeline_name), ) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format(len(partitions)) ): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition in partitions: run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, mode=partition_set.mode, solids_to_execute=frozenset(partition_set.solid_selection) if partition_set and partition_set.solid_selection else None, environment_dict=partition_set.environment_dict_for_partition(partition), tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags), ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn(' Aborted!')
def execute_backfill_command(cli_args, print_fn, instance=None): instance = instance or DagsterInstance.get() repo_location = get_repository_location_from_kwargs(cli_args, instance) external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get('repository')) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get('pipeline'), ) noprompt = cli_args.get('noprompt') pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format( external_pipeline.name)) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter( pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError( 'No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x for x in pipeline_partition_set_names.keys()))) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError( 'No partition set found named `{}`'.format(partition_set_name)) mode = partition_set.mode solid_selection = partition_set.solid_selection repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location_handle=repo_location.location_handle, ) # Resolve partitions to backfill partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching partition names for {partition_set_name}: {error_message}' .format( partition_set_name=partition_set_name, error_message=partition_names_or_error.error.message, ), serialized_error_info=partition_names_or_error.error, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(external_pipeline.name)) print_fn('Partition set: {}'.format(partition_set_name)) print_fn(' Partitions: {}\n'.format( print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format( len(partition_names))): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition_name in partition_names: run_config_or_error = repo_location.get_external_partition_config( repo_handle, partition_set_name, partition_name) if isinstance(run_config_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching run config for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=run_config_or_error.error.message, ), serialized_error_info=run_config_or_error.error, ) tags_or_error = repo_location.get_external_partition_tags( repo_handle, partition_set_name, partition_name) if isinstance(tags_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching tags for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=tags_or_error.error.message, ), serialized_error_info=tags_or_error.error, ) run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=run_config_or_error.run_config, mode=mode, preset=None, tags=merge_dicts(tags_or_error.tags, run_tags), solid_selection=frozenset(solid_selection) if solid_selection else None, ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn('Aborted!')