def host_dagit_ui_with_workspace_process_context( workspace_process_context: WorkspaceProcessContext, host: Optional[str], port: int, path_prefix: str, ): check.inst_param(workspace_process_context, "workspace_process_context", WorkspaceProcessContext) check.opt_str_param(host, "host") check.int_param(port, "port") check.str_param(path_prefix, "path_prefix") configure_loggers() logger = logging.getLogger("dagit") app = create_app_from_workspace_process_context(workspace_process_context, path_prefix) logger.info( "Serving dagit on http://{host}:{port}{path_prefix} in process {pid}". format(host=host, port=port, path_prefix=path_prefix, pid=os.getpid())) log_action(workspace_process_context.instance, START_DAGIT_WEBSERVER) with uploading_logging_thread(): uvicorn.run( app, host=host, port=port, access_log=False, log_level="warning", )
def start_server(host, port, app, port_lookup, port_lookup_attempts=0): server = pywsgi.WSGIServer((host, port), app, handler_class=WebSocketHandler) print('Serving on http://{host}:{port} in process {pid}'.format( host=host, port=port, pid=os.getpid())) log_action(START_DAGIT_WEBSERVER) with uploading_logging_thread(): try: server.serve_forever() except OSError as os_error: if 'Address already in use' in str(os_error): if port_lookup and (port_lookup_attempts > 0 or click.confirm( ('Another process on your machine is already listening on port {port}. ' 'Would you like to run the app at another port instead?' ).format(port=port))): port_lookup_attempts += 1 start_server(host, port + port_lookup_attempts, app, True, port_lookup_attempts) else: six.raise_from( Exception(( 'Another process on your machine is already listening on port {port}. ' 'It is possible that you have another instance of dagit ' 'running somewhere using the same port. Or it could be another ' 'random process. Either kill that process or use the -p option to ' 'select another port.').format(port=port)), os_error, ) else: raise os_error
def _create_scheduler_run( instance, schedule_time, repo_location, external_schedule, external_pipeline, run_request, ): from dagster.daemon.daemon import get_telemetry_daemon_session_id run_config = run_request.run_config schedule_tags = run_request.tags external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_config, external_schedule.mode, step_keys_to_execute=None, known_state=None, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts(pipeline_tags, schedule_tags) tags[SCHEDULED_EXECUTION_TIME_TAG] = to_timezone(schedule_time, "UTC").isoformat() if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key log_action( instance, SCHEDULED_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "SCHEDULE_NAME_HASH": hash_name(external_schedule.name), "repo_hash": hash_name(repo_location.name), "pipeline_name_hash": hash_name(external_pipeline.name), }, ) return instance.create_run( pipeline_name=external_schedule.pipeline_name, run_id=None, run_config=run_config, mode=external_schedule.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=external_pipeline.solid_selection, status=PipelineRunStatus.NOT_STARTED, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def _check_add_heartbeat( self, instance, daemon_uuid, heartbeat_interval_seconds, error_interval_seconds ): error_max_time = pendulum.now("UTC").subtract(seconds=error_interval_seconds) while len(self._errors): _earliest_error, earliest_timestamp = self._errors[-1] if earliest_timestamp >= error_max_time: break self._errors.pop() curr_time = pendulum.now("UTC") if ( self._last_heartbeat_time and (curr_time - self._last_heartbeat_time).total_seconds() < heartbeat_interval_seconds ): return daemon_type = self.daemon_type() last_stored_heartbeat = instance.get_daemon_heartbeats().get(daemon_type) if ( self._last_heartbeat_time and last_stored_heartbeat and last_stored_heartbeat.daemon_id != daemon_uuid ): self._logger.error( "Another {} daemon is still sending heartbeats. You likely have multiple " "daemon processes running at once, which is not supported. " "Last heartbeat daemon id: {}, " "Current daemon_id: {}".format( daemon_type, last_stored_heartbeat.daemon_id, daemon_uuid, ) ) self._last_heartbeat_time = curr_time instance.add_daemon_heartbeat( DaemonHeartbeat( curr_time.float_timestamp, daemon_type, daemon_uuid, errors=[error for (error, timestamp) in self._errors], ) ) if ( not self._last_log_time or (curr_time - self._last_log_time).total_seconds() >= TELEMETRY_LOGGING_INTERVAL ): log_action( instance, DAEMON_ALIVE, metadata={"DAEMON_SESSION_ID": get_telemetry_daemon_session_id()}, ) self._last_log_time = curr_time
def _create_sensor_run( instance, repo_location, external_sensor, external_pipeline, run_request, target_data ): from dagster.daemon.daemon import get_telemetry_daemon_session_id external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, run_request.run_config, target_data.mode, step_keys_to_execute=None, known_state=None, instance=instance, ) execution_plan_snapshot = external_execution_plan.execution_plan_snapshot pipeline_tags = external_pipeline.tags or {} check_tags(pipeline_tags, "pipeline_tags") tags = merge_dicts( merge_dicts(pipeline_tags, run_request.tags), PipelineRun.tags_for_sensor(external_sensor), ) if run_request.run_key: tags[RUN_KEY_TAG] = run_request.run_key log_action( instance, SENSOR_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "SENSOR_NAME_HASH": hash_name(external_sensor.name), "pipeline_name_hash": hash_name(external_pipeline.name), "repo_hash": hash_name(repo_location.name), }, ) return instance.create_run( pipeline_name=target_data.pipeline_name, run_id=None, run_config=run_request.run_config, mode=target_data.mode, solids_to_execute=external_pipeline.solids_to_execute, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, solid_selection=target_data.solid_selection, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), )
def log_dagit_telemetry_event(graphene_info, action, client_time, metadata): from ..schema.roots.mutation import GrapheneLogTelemetrySuccess instance = graphene_info.context.instance metadata = json.loads(metadata) client_time = datetime.utcfromtimestamp(int(client_time) / 1000) log_action( instance=instance, action=action, client_time=client_time, elapsed_time=None, metadata=metadata, ) return GrapheneLogTelemetrySuccess(action=action)
def _check_add_heartbeat(self, instance, daemon_uuid, heartbeat_interval_seconds, error_interval_seconds): error_max_time = pendulum.now("UTC").subtract( seconds=error_interval_seconds) while len(self._errors): _earliest_error, earliest_timestamp = self._errors[-1] if earliest_timestamp >= error_max_time: break self._errors.pop() curr_time = pendulum.now("UTC") if (self._last_heartbeat_time and (curr_time - self._last_heartbeat_time).total_seconds() < heartbeat_interval_seconds): return daemon_type = self.daemon_type() last_stored_heartbeat = instance.get_daemon_heartbeats().get( daemon_type) if (self._last_heartbeat_time and last_stored_heartbeat and last_stored_heartbeat.daemon_id != daemon_uuid): self._logger.warning( "Taking over from another {} daemon process. If this " "message reoccurs, you may have multiple daemons running which is not supported. " "Last heartbeat daemon id: {}, " "Current daemon_id: {}".format( daemon_type, last_stored_heartbeat.daemon_id, daemon_uuid, )) self._last_heartbeat_time = curr_time instance.add_daemon_heartbeat( DaemonHeartbeat( curr_time.float_timestamp, daemon_type, daemon_uuid, errors=[error for (error, timestamp) in self._errors], )) if (not self._last_log_time or (curr_time - self._last_log_time).total_seconds() >= TELEMETRY_LOGGING_INTERVAL): log_action(instance, DAEMON_ALIVE) self._last_log_time = curr_time
def start_server(instance, host, port, path_prefix, app, port_lookup, port_lookup_attempts=0): server = pywsgi.WSGIServer((host, port), app, handler_class=WebSocketHandler) print( # pylint: disable=print-call "Serving on http://{host}:{port}{path_prefix} in process {pid}".format( host=host, port=port, path_prefix=path_prefix, pid=os.getpid() ) ) log_action(instance, START_DAGIT_WEBSERVER) with uploading_logging_thread(): try: server.serve_forever() except OSError as os_error: if "Address already in use" in str(os_error): if port_lookup and ( port_lookup_attempts > 0 or click.confirm( ( "Another process on your machine is already listening on port {port}. " "Would you like to run the app at another port instead?" ).format(port=port) ) ): port_lookup_attempts += 1 start_server( instance, host, port + port_lookup_attempts, path_prefix, app, True, port_lookup_attempts, ) else: six.raise_from( Exception( ( "Another process on your machine is already listening on port {port}. " "It is possible that you have another instance of dagit " "running somewhere using the same port. Or it could be another " "random process. Either kill that process or use the -p option to " "select another port." ).format(port=port) ), os_error, ) else: raise os_error
def create_backfill_run(instance, repo_location, external_pipeline, external_partition_set, backfill_job, partition_data): from dagster.daemon.daemon import get_telemetry_daemon_session_id check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repo_location, "repo_location", RepositoryLocation) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.inst_param(external_partition_set, "external_partition_set", ExternalPartitionSet) check.inst_param(backfill_job, "backfill_job", PartitionBackfill) check.inst_param(partition_data, "partition_data", ExternalPartitionExecutionParamData) log_action( instance, BACKFILL_RUN_CREATED, metadata={ "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(), "repo_hash": hash_name(repo_location.name), "pipeline_name_hash": hash_name(external_pipeline.name), }, ) tags = merge_dicts( external_pipeline.tags, partition_data.tags, PipelineRun.tags_for_backfill_id(backfill_job.backfill_id), backfill_job.tags, ) solids_to_execute = None solid_selection = None if not backfill_job.from_failure and not backfill_job.reexecution_steps: step_keys_to_execute = None parent_run_id = None root_run_id = None known_state = None if external_partition_set.solid_selection: solids_to_execute = frozenset( external_partition_set.solid_selection) solid_selection = external_partition_set.solid_selection elif backfill_job.from_failure: last_run = _fetch_last_run(instance, external_partition_set, partition_data.name) if not last_run or last_run.status != PipelineRunStatus.FAILURE: return None return instance.create_reexecuted_run_from_failure( last_run, repo_location, external_pipeline, tags=tags, run_config=partition_data.run_config, mode=external_partition_set.mode, ) elif backfill_job.reexecution_steps: last_run = _fetch_last_run(instance, external_partition_set, partition_data.name) parent_run_id = last_run.run_id if last_run else None root_run_id = (last_run.root_run_id or last_run.run_id) if last_run else None if parent_run_id and root_run_id: tags = merge_dicts(tags, { PARENT_RUN_ID_TAG: parent_run_id, ROOT_RUN_ID_TAG: root_run_id }) step_keys_to_execute = backfill_job.reexecution_steps if last_run and last_run.status == PipelineRunStatus.SUCCESS: known_state = KnownExecutionState.for_reexecution( instance.all_logs(parent_run_id), step_keys_to_execute, ) else: known_state = None if external_partition_set.solid_selection: solids_to_execute = frozenset( external_partition_set.solid_selection) solid_selection = external_partition_set.solid_selection external_execution_plan = repo_location.get_external_execution_plan( external_pipeline, partition_data.run_config, external_partition_set.mode, step_keys_to_execute=step_keys_to_execute, known_state=known_state, instance=instance, ) return instance.create_run( pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=external_execution_plan. execution_plan_snapshot, parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot, pipeline_name=external_pipeline.name, run_id=make_new_run_id(), solids_to_execute=solids_to_execute, run_config=partition_data.run_config, mode=external_partition_set.mode, step_keys_to_execute=step_keys_to_execute, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, status=PipelineRunStatus.NOT_STARTED, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), solid_selection=solid_selection, )