def execute_backfill_iteration(instance, logger, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) if not instance.has_bulk_actions_table(): message = ( "A schema migration is required before daemon-based backfills can be supported. " "Try running `dagster instance migrate` to migrate your instance and try again." ) logger.error(message) yield SerializableErrorInfo( message=message, stack=[], cls_name="", ) return backfill_jobs = instance.get_backfills(status=BulkActionStatus.REQUESTED) if not backfill_jobs: logger.error("No backfill jobs requested.") yield return for backfill_job in backfill_jobs: backfill_id = backfill_job.backfill_id if not backfill_job.last_submitted_partition_name: logger.info(f"Starting backfill for {backfill_id}") else: logger.info( f"Resuming backfill for {backfill_id} from {backfill_job.last_submitted_partition_name}" ) origin = (backfill_job.partition_set_origin.external_repository_origin. repository_location_origin) try: with origin.create_handle() as repo_location_handle: repo_location = repo_location_handle.create_location() has_more = True while has_more: chunk, checkpoint, has_more = _get_partitions_chunk( instance, logger, backfill_job, CHECKPOINT_COUNT) _check_for_debug_crash(debug_crash_flags, "BEFORE_SUBMIT") if chunk: submit_backfill_runs(instance, repo_location, backfill_job, chunk) _check_for_debug_crash(debug_crash_flags, "AFTER_SUBMIT") if has_more: instance.update_backfill( backfill_job.with_partition_checkpoint(checkpoint)) yield None time.sleep(CHECKPOINT_INTERVAL) else: logger.info( f"Backfill completed for {backfill_id} for {len(backfill_job.partition_names)} partitions" ) instance.update_backfill( backfill_job.with_status( BulkActionStatus.COMPLETED)) yield None except DagsterBackfillFailedError as e: error_info = e.serializable_error_info instance.update_backfill( backfill_job.with_status( BulkActionStatus.FAILED).with_error(error_info)) if error_info: logger.error( f"Backfill failed for {backfill_id}: {error_info.to_string()}" ) yield error_info
def create_and_launch_partition_backfill(graphene_info, backfill_params): from ...schema.backfill import GrapheneLaunchBackfillSuccess from ...schema.errors import GraphenePartitionSetNotFoundError partition_set_selector = backfill_params.get("selector") partition_set_name = partition_set_selector.get("partitionSetName") repository_selector = RepositorySelector.from_graphql_input( partition_set_selector.get("repositorySelector")) location = graphene_info.context.get_repository_location( repository_selector.location_name) repository = location.get_repository(repository_selector.repository_name) matches = [ partition_set for partition_set in repository.get_external_partition_sets() if partition_set.name == partition_set_selector.get("partitionSetName") ] if not matches: return GraphenePartitionSetNotFoundError(partition_set_name) check.invariant( len(matches) == 1, "Partition set names must be unique: found {num} matches for {partition_set_name}" .format(num=len(matches), partition_set_name=partition_set_name), ) external_partition_set = next(iter(matches)) partition_names = backfill_params.get("partitionNames") backfill_id = make_new_backfill_id() backfill = PartitionBackfill( backfill_id=backfill_id, partition_set_origin=external_partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=partition_names, from_failure=bool(backfill_params.get("fromFailure")), reexecution_steps=backfill_params.get("reexecutionSteps"), tags={t["key"]: t["value"] for t in backfill_params.get("tags", [])}, backfill_timestamp=pendulum.now("UTC").timestamp(), ) if backfill_params.get("forceSynchronousSubmission"): # should only be used in a test situation to_submit = [name for name in partition_names] submitted_run_ids = [] while to_submit: chunk = to_submit[:BACKFILL_CHUNK_SIZE] to_submit = to_submit[BACKFILL_CHUNK_SIZE:] submitted_run_ids.extend(run_id for run_id in submit_backfill_runs( graphene_info.context.instance, workspace=graphene_info.context, repo_location=location, backfill_job=backfill, partition_names=chunk, ) if run_id != None) return GrapheneLaunchBackfillSuccess( backfill_id=backfill_id, launched_run_ids=submitted_run_ids) graphene_info.context.instance.add_backfill(backfill) return GrapheneLaunchBackfillSuccess(backfill_id=backfill_id)
def execute_backfill_iteration(instance, workspace, logger, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(workspace, "workspace", IWorkspace) backfill_jobs = instance.get_backfills(status=BulkActionStatus.REQUESTED) if not backfill_jobs: logger.debug("No backfill jobs requested.") yield return for backfill_job in backfill_jobs: backfill_id = backfill_job.backfill_id # refetch, in case the backfill was updated in the meantime backfill_job = instance.get_backfill(backfill_id) if not backfill_job.last_submitted_partition_name: logger.info(f"Starting backfill for {backfill_id}") else: logger.info( f"Resuming backfill for {backfill_id} from {backfill_job.last_submitted_partition_name}" ) origin = (backfill_job.partition_set_origin.external_repository_origin. repository_location_origin) try: repo_location = workspace.get_location(origin.location_name) repo_name = backfill_job.partition_set_origin.external_repository_origin.repository_name partition_set_name = backfill_job.partition_set_origin.partition_set_name if not repo_location.has_repository(repo_name): raise DagsterBackfillFailedError( f"Could not find repository {repo_name} in location {repo_location.name} to " f"run backfill {backfill_id}.") external_repo = repo_location.get_repository(repo_name) if not external_repo.has_external_partition_set( partition_set_name): raise DagsterBackfillFailedError( f"Could not find partition set {partition_set_name} in repository {repo_name}. " ) has_more = True while has_more: if backfill_job.status != BulkActionStatus.REQUESTED: break chunk, checkpoint, has_more = _get_partitions_chunk( instance, logger, backfill_job, CHECKPOINT_COUNT) _check_for_debug_crash(debug_crash_flags, "BEFORE_SUBMIT") if chunk: for _run_id in submit_backfill_runs( instance, workspace, repo_location, backfill_job, chunk): yield # before submitting, refetch the backfill job to check for status changes backfill_job = instance.get_backfill( backfill_job.backfill_id) if backfill_job.status != BulkActionStatus.REQUESTED: return _check_for_debug_crash(debug_crash_flags, "AFTER_SUBMIT") if has_more: # refetch, in case the backfill was updated in the meantime backfill_job = instance.get_backfill( backfill_job.backfill_id) instance.update_backfill( backfill_job.with_partition_checkpoint(checkpoint)) yield time.sleep(CHECKPOINT_INTERVAL) else: logger.info( f"Backfill completed for {backfill_id} for {len(backfill_job.partition_names)} partitions" ) instance.update_backfill( backfill_job.with_status(BulkActionStatus.COMPLETED)) yield except Exception: error_info = serializable_error_info_from_exc_info(sys.exc_info()) instance.update_backfill( backfill_job.with_status( BulkActionStatus.FAILED).with_error(error_info)) logger.error( f"Backfill failed for {backfill_id}: {error_info.to_string()}") yield error_info
def execute_backfill_iteration(instance, grpc_server_registry, logger, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) backfill_jobs = instance.get_backfills(status=BulkActionStatus.REQUESTED) if not backfill_jobs: logger.info("No backfill jobs requested.") yield return with RepositoryLocationManager(grpc_server_registry) as location_manager: for backfill_job in backfill_jobs: backfill_id = backfill_job.backfill_id if not backfill_job.last_submitted_partition_name: logger.info(f"Starting backfill for {backfill_id}") else: logger.info( f"Resuming backfill for {backfill_id} from {backfill_job.last_submitted_partition_name}" ) origin = (backfill_job.partition_set_origin. external_repository_origin.repository_location_origin) try: repo_location = location_manager.get_location(origin) has_more = True while has_more: # refetch the backfill job backfill_job = instance.get_backfill( backfill_job.backfill_id) if backfill_job.status != BulkActionStatus.REQUESTED: break chunk, checkpoint, has_more = _get_partitions_chunk( instance, logger, backfill_job, CHECKPOINT_COUNT) _check_for_debug_crash(debug_crash_flags, "BEFORE_SUBMIT") if chunk: for _run_id in submit_backfill_runs( instance, repo_location, backfill_job, chunk): yield _check_for_debug_crash(debug_crash_flags, "AFTER_SUBMIT") if has_more: instance.update_backfill( backfill_job.with_partition_checkpoint(checkpoint)) yield time.sleep(CHECKPOINT_INTERVAL) else: logger.info( f"Backfill completed for {backfill_id} for {len(backfill_job.partition_names)} partitions" ) instance.update_backfill( backfill_job.with_status( BulkActionStatus.COMPLETED)) yield except DagsterBackfillFailedError as e: error_info = e.serializable_error_info instance.update_backfill( backfill_job.with_status( BulkActionStatus.FAILED).with_error(error_info)) if error_info: logger.error( f"Backfill failed for {backfill_id}: {error_info.to_string()}" ) yield error_info