def execute_run_command(input_json): with capture_interrupts(): args = deserialize_as(input_json, ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: buffer = [] def send_to_buffer(event): buffer.append(serialize_dagster_namedtuple(event)) return_code = _execute_run_command_body( recon_pipeline, args.pipeline_run_id, instance, send_to_buffer, set_exit_code_on_failure=args.set_exit_code_on_failure or False, ) for line in buffer: click.echo(line) if return_code != 0: sys.exit(return_code)
def launch_run(self, context: LaunchRunContext) -> None: run = context.pipeline_run check.inst_param(run, "run", PipelineRun) if not context.workspace: raise DagsterInvariantViolationError( "DefaultRunLauncher requires a workspace to be included in its LaunchRunContext" ) external_pipeline_origin = check.not_none(run.external_pipeline_origin) repository_location = context.workspace.get_location( external_pipeline_origin.external_repository_origin.repository_location_origin.location_name ) check.inst( repository_location, GrpcServerRepositoryLocation, "DefaultRunLauncher: Can't launch runs for pipeline not loaded from a GRPC server", ) self._instance.add_run_tags( run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts( {"host": repository_location.host}, ( {"port": repository_location.port} if repository_location.port else {"socket": repository_location.socket} ), ({"use_ssl": True} if repository_location.use_ssl else {}), ) ) }, ) res = deserialize_as( repository_location.client.start_run( ExecuteExternalPipelineArgs( pipeline_origin=external_pipeline_origin, pipeline_run_id=run.run_id, instance_ref=self._instance.get_ref(), ) ), StartRunResult, ) if not res.success: raise ( DagsterLaunchFailedError( res.message, serializable_error_info=res.serializable_error_info ) ) self._run_ids.add(run.run_id) if self._wait_for_processes: self._locations_to_wait_for.append(repository_location)
def get_daemon_heartbeats(self) -> Dict[str, DaemonHeartbeat]: with self.connect() as conn: rows = conn.execute(db.select(DaemonHeartbeatsTable.columns)) heartbeats = [] for row in rows: heartbeats.append(deserialize_as(row.body, DaemonHeartbeat)) return {heartbeat.daemon_type: heartbeat for heartbeat in heartbeats}
def execute_step_command(input_json): with capture_interrupts(): args = deserialize_as(input_json, ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution". format(args.pipeline_run_id), ) if args.should_verify_step: success = verify_step( instance, pipeline_run, args.known_state.get_retry_state(), args.step_keys_to_execute, ) if not success: return recon_pipeline = recon_pipeline_from_origin( args.pipeline_origin ).subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( recon_pipeline, run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, known_state=args.known_state, ) buff = [] for event in execute_plan_iterator( execution_plan, recon_pipeline, pipeline_run, instance, run_config=pipeline_run.run_config, retry_mode=args.retry_mode, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def add_selector_id_to_jobs_table(storage, print_fn=None): """ Utility method that calculates the selector_id for each stored instigator state, and writes it to the jobs table. """ if print_fn: print_fn("Querying storage.") with storage.connect() as conn: rows = conn.execute( db.select([ JobTable.c.id, JobTable.c.job_body, JobTable.c.create_timestamp, JobTable.c.update_timestamp, ]).order_by(JobTable.c.id.asc())).fetchall() for (row_id, state_str, create_timestamp, update_timestamp) in tqdm(rows): state = deserialize_as(state_str, InstigatorState) selector_id = state.selector_id # insert the state into a new instigator table, which has a unique constraint on # selector_id try: conn.execute(InstigatorsTable.insert().values( selector_id=selector_id, repository_selector_id=state.repository_selector_id, status=state.status.value, instigator_type=state.instigator_type.value, instigator_body=state_str, create_timestamp=create_timestamp, update_timestamp=update_timestamp, )) except db.exc.IntegrityError: conn.execute(InstigatorsTable.update().where( InstigatorsTable.c.selector_id == selector_id).values( status=state.status.value, repository_selector_id=state.repository_selector_id, instigator_type=state.instigator_type.value, instigator_body=state_str, update_timestamp=update_timestamp, )) conn.execute(JobTable.update() # pylint: disable=no-value-for-parameter .where(JobTable.c.id == row_id).where( JobTable.c.selector_id == None).values( selector_id=state.selector_id)) if print_fn: print_fn("Complete.")
def get_run_by_id(self, run_id: str) -> Optional[PipelineRun]: """Get a run by its id. Args: run_id (str): The id of the run Returns: Optional[PipelineRun] """ check.str_param(run_id, "run_id") query = db.select([RunsTable.c.run_body]).where(RunsTable.c.run_id == run_id) rows = self.fetchall(query) return deserialize_as(rows[0][0], PipelineRun) if len(rows) else None
def sync_get_external_execution_plan_grpc( api_client: "DagsterGrpcClient", pipeline_origin: ExternalPipelineOrigin, run_config: Mapping[str, Any], mode: str, pipeline_snapshot_id: str, solid_selection: Optional[List[str]] = None, step_keys_to_execute: Optional[List[str]] = None, known_state: Optional[KnownExecutionState] = None, instance: Optional[DagsterInstance] = None, ) -> ExecutionPlanSnapshot: from dagster.grpc.client import DagsterGrpcClient check.inst_param(api_client, "api_client", DagsterGrpcClient) check.inst_param(pipeline_origin, "pipeline_origin", ExternalPipelineOrigin) solid_selection = check.opt_list_param(solid_selection, "solid_selection", of_type=str) run_config = check.dict_param(run_config, "run_config", key_type=str) check.str_param(mode, "mode") check.opt_nullable_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) check.str_param(pipeline_snapshot_id, "pipeline_snapshot_id") check.opt_inst_param(known_state, "known_state", KnownExecutionState) check.opt_inst_param(instance, "instance", DagsterInstance) result = deserialize_as( api_client.execution_plan_snapshot( execution_plan_snapshot_args=ExecutionPlanSnapshotArgs( pipeline_origin=pipeline_origin, solid_selection=solid_selection, run_config=run_config, mode=mode, step_keys_to_execute=step_keys_to_execute, pipeline_snapshot_id=pipeline_snapshot_id, known_state=known_state, instance_ref=instance.get_ref( ) if instance and instance.is_persistent else None, )), (ExecutionPlanSnapshot, ExecutionPlanSnapshotErrorData), ) if isinstance(result, ExecutionPlanSnapshotErrorData): raise DagsterUserCodeProcessError.from_error_info(result.error) return result
def get_backfills(self, status: BulkActionStatus = None, cursor: str = None, limit: int = None) -> List[PartitionBackfill]: check.opt_inst_param(status, "status", BulkActionStatus) query = db.select([BulkActionsTable.c.body]) if status: query = query.where(BulkActionsTable.c.status == status.value) if cursor: cursor_query = db.select([ BulkActionsTable.c.id ]).where(BulkActionsTable.c.key == cursor) query = query.where(BulkActionsTable.c.id < cursor_query) if limit: query = query.limit(limit) query = query.order_by(BulkActionsTable.c.id.desc()) rows = self.fetchall(query) return [deserialize_as(row[0], PartitionBackfill) for row in rows]
def get_run_records( self, filters: PipelineRunsFilter = None, limit: int = None, order_by: str = None, ascending: bool = False, cursor: str = None, bucket_by: Optional[Union[JobBucket, TagBucket]] = None, ) -> List[RunRecord]: filters = check.opt_inst_param(filters, "filters", PipelineRunsFilter, default=PipelineRunsFilter()) check.opt_int_param(limit, "limit") columns = ["id", "run_body", "create_timestamp", "update_timestamp"] if self.has_run_stats_index_cols(): columns += ["start_time", "end_time"] # only fetch columns we use to build RunRecord query = self._runs_query( filters=filters, limit=limit, columns=columns, order_by=order_by, ascending=ascending, cursor=cursor, bucket_by=bucket_by, ) rows = self.fetchall(query) return [ RunRecord( storage_id=check.int_param(row["id"], "id"), pipeline_run=deserialize_as( check.str_param(row["run_body"], "run_body"), PipelineRun), create_timestamp=check.inst(row["create_timestamp"], datetime), update_timestamp=check.inst(row["update_timestamp"], datetime), start_time=check.opt_inst(row["start_time"], float) if "start_time" in row else None, end_time=check.opt_inst(row["end_time"], float) if "end_time" in row else None, ) for row in rows ]
def execute_step_command(input_json): with capture_interrupts(): args = deserialize_as(input_json, ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) buff = [] for event in _execute_step_command_body( args, instance, pipeline_run, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def sync_get_external_partition_names_grpc( api_client: "DagsterGrpcClient", repository_handle: RepositoryHandle, partition_set_name: str) -> ExternalPartitionNamesData: from dagster.grpc.client import DagsterGrpcClient check.inst_param(api_client, "api_client", DagsterGrpcClient) check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") repository_origin = repository_handle.get_external_origin() result = deserialize_as( api_client.external_partition_names( partition_names_args=PartitionNamesArgs( repository_origin=repository_origin, partition_set_name=partition_set_name, ), ), (ExternalPartitionNamesData, ExternalPartitionExecutionErrorData), ) if isinstance(result, ExternalPartitionExecutionErrorData): raise DagsterUserCodeProcessError.from_error_info(result.error) return result
def resume_run_command(input_json): with capture_interrupts(): args = deserialize_as(input_json, ResumeRunArgs) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: buffer = [] def send_to_buffer(event): buffer.append(serialize_dagster_namedtuple(event)) _resume_run_command_body( recon_pipeline, args.pipeline_run_id, instance, send_to_buffer, ) for line in buffer: click.echo(line)
def get_backfill(self, backfill_id: str) -> Optional[PartitionBackfill]: check.str_param(backfill_id, "backfill_id") query = db.select([BulkActionsTable.c.body ]).where(BulkActionsTable.c.key == backfill_id) row = self.fetchone(query) return deserialize_as(row[0], PartitionBackfill) if row else None
def _row_to_run(self, row: Tuple) -> PipelineRun: return deserialize_as(row[0], PipelineRun)
def _reload_current_image(self) -> str: return deserialize_as( self.client.get_current_image(), GetCurrentImageResult, ).current_image
def _run_in_subprocess( serialized_execute_run_args, recon_pipeline, termination_event, subprocess_status_handler, run_event_handler, ): start_termination_thread(termination_event) try: execute_run_args = deserialize_as(serialized_execute_run_args, ExecuteExternalPipelineArgs) with (DagsterInstance.from_ref(execute_run_args.instance_ref) if execute_run_args.instance_ref else nullcontext()) as instance: instance = check.not_none(instance) pipeline_run = instance.get_run_by_id( execute_run_args.pipeline_run_id) if not pipeline_run: raise DagsterRunNotFoundError( "gRPC server could not load run {run_id} in order to execute it. Make sure that the gRPC server has access to your run storage." .format(run_id=execute_run_args.pipeline_run_id), invalid_run_id=execute_run_args.pipeline_run_id, ) pid = os.getpid() except: serializable_error_info = serializable_error_info_from_exc_info( sys.exc_info()) event = IPCErrorMessage( serializable_error_info=serializable_error_info, message="Error during RPC setup for executing run: {message}". format(message=serializable_error_info.message), ) subprocess_status_handler(event) subprocess_status_handler(RunInSubprocessComplete()) return subprocess_status_handler(StartRunInSubprocessSuccessful()) run_event_handler( instance.report_engine_event( "Started process for run (pid: {pid}).".format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end="cli_api_subprocess_init"), )) # This is so nasty but seemingly unavoidable # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/ closed = False try: for event in core_execute_run(recon_pipeline, pipeline_run, instance): run_event_handler(event) except GeneratorExit: closed = True raise except: # Relies on core_execute_run logging all exceptions to the event log before raising pass finally: if not closed: run_event_handler( instance.report_engine_event( "Process for run exited (pid: {pid}).".format(pid=pid), pipeline_run, )) subprocess_status_handler(RunInSubprocessComplete()) instance.dispose()