class UpdateDagRunsRequestSchema(ApiStrictSchema): dag_runs = fields.Nested(DagRunSchema, many=True) task_instances = fields.Nested(TaskInstanceSchema, many=True) last_seen_log_id = fields.Integer(allow_none=True) airflow_export_meta = fields.Nested(AirflowExportMetaSchema, required=False) error_message = fields.String(required=False, allow_none=True) syncer_type = fields.String(allow_none=True)
class DatasetsReportSchema(ApiStrictSchema): sync_event_uid = fields.UUID() monitor_state = fields.Nested(DatasourceMonitorStateSchema) source_type = fields.String(allow_none=True) # bigquery / snowflake / etc syncer_type = fields.String(allow_none=True) sync_event_timestamp = fields.DateTime(required=False, allow_none=True) datasets = fields.Nested(SyncedDatasetSchema, many=True) @post_load def make_object(self, data): return DatasetsReport(**data)
class SyncedTransactionsReportSchema(ApiStrictSchema): sync_event_uid = fields.UUID() sync_event_timestamp = fields.DateTime() source_type = fields.String(allow_none=True) # bigquery / snowflake / etc syncer_type = fields.String(allow_none=True) monitor_state = fields.Nested(DatasourceMonitorStateSchema) transactions = fields.Nested(SyncedTransactionSchema, many=True) @post_load def make_object(self, data): return SyncedTransactionsReport(**data)
class TaskRunAttemptSchema(ApiStrictSchema): task_run_uid = fields.UUID() task_run_attempt_uid = fields.UUID() state = EnumField(TaskRunState, allow_none=True) timestamp = fields.DateTime(allow_none=True) first_error = fields.Nested(ErrorInfoSchema, allow_none=True) latest_error = fields.Nested(ErrorInfoSchema, allow_none=True) attempt_number = fields.Number(allow_none=True) source = fields.Str(allow_none=True) start_date = fields.DateTime(allow_none=True) end_date = fields.DateTime(allow_none=True) external_links_dict = fields.Dict(allow_none=True)
class AirflowServerInfoSchema(ApiStrictSchema): airflow_version = fields.String(allow_none=True) airflow_export_version = fields.String(allow_none=True) airflow_monitor_version = fields.String(allow_none=True) last_sync_time = fields.DateTime(allow_none=True) monitor_error_message = fields.String(allow_none=True) synced_to = fields.DateTime(allow_none=True) api_mode = fields.String(allow_none=True) sync_interval = fields.Integer(allow_none=True) is_sync_enabled = fields.Boolean(allow_none=True) system_alert_definitions = fields.Dict() fetcher = fields.String(allow_none=True) composer_client_id = fields.String(allow_none=True) dag_ids = fields.String(allow_none=True) base_url = fields.String() external_url = fields.String(allow_none=True) source_instance_uid = fields.String(allow_none=True) tracking_source_uid = fields.String() airflow_instance_uid = fields.String( allow_none=True) # TODO_API: deprecate name = fields.String(allow_none=True) env = fields.String(allow_none=True) monitor_status = fields.String(allow_none=True) monitor_config = fields.Nested(MonitorConfigSchema, allow_none=True) airflow_environment = fields.String(allow_none=True) last_seen_dag_run_id = fields.Integer(allow_none=True) last_seen_log_id = fields.Integer(allow_none=True) @post_load def make_object(self, data, **kwargs): return AirflowServerInfo(**data)
class ScheduledJobSchemaV2(Schema): class Meta: strict = True uid = fields.Str(attribute="DbndScheduledJob.uid", allow_none=True) name = fields.Str(attribute="DbndScheduledJob.name", required=True) cmd = fields.Str(attribute="DbndScheduledJob.cmd", required=True) schedule_interval = fields.Str( attribute="DbndScheduledJob.schedule_interval", required=True) start_date = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.start_date", format="iso") end_date = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.end_date", format="iso") readable_schedule_interval = fields.Str( attribute="DbndScheduledJob.readable_schedule_interval", allow_none=True) scheduled_interval_in_seconds = fields.Integer( attribute="DbndScheduledJob.scheduled_interval_in_seconds", allow_none=True) catchup = fields.Boolean(allow_none=True, attribute="DbndScheduledJob.catchup") depends_on_past = fields.Boolean( allow_none=True, attribute="DbndScheduledJob.depends_on_past") retries = fields.Int(allow_none=True, attribute="DbndScheduledJob.retries") active = fields.Boolean(allow_none=True, attribute="DbndScheduledJob.active") create_user = fields.Str(allow_none=True, attribute="DbndScheduledJob.create_user") create_time = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.create_time") update_user = fields.Str(allow_none=True, attribute="DbndScheduledJob.update_user") update_time = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.update_time") from_file = fields.Boolean(allow_none=True, attribute="DbndScheduledJob.from_file") deleted_from_file = fields.Boolean( allow_none=True, attribute="DbndScheduledJob.deleted_from_file") next_job_date = fields.DateTime(attribute="DbndScheduledJob.next_job_date", allow_none=True) alerts = fields.List( fields.Nested(AlertEventSchema), attribute="DbndScheduledJob.alerts", allow_none=True, ) job_name = fields.Str(dump_only=True, attribute="DbndScheduledJob.job_name") last_run_uid = fields.UUID(dump_only=True) last_run_job = fields.Str(dump_only=True) last_job_date = fields.DateTime(dump_only=True) last_run_state = fields.Str(dump_only=True) is_airflow_synced = fields.Bool(dump_only=True) list_order = fields.Integer(attribute="DbndScheduledJob.list_order", allow_none=True) validation_errors = fields.Str( allow_none=True, attribute="DbndScheduledJob.validation_errors")
class SyncedTransactionSchema(ApiStrictSchema): datasource_transaction_id = fields.String() created_date = fields.DateTime() started_date = fields.DateTime() ended_date = fields.DateTime() write_operation = fields.Nested(SyncedTransactionOperationSchema) read_operations = fields.Nested( SyncedTransactionOperationSchema, many=True, allow_none=True ) query_string = fields.String(allow_none=True) data_schema = fields.Nested(StructuredDataSchema, allow_none=True) @post_load def make_object(self, data): return SyncedTransaction(**data)
class InitRunArgsSchema(ApiObjectSchema): run_uid = fields.UUID() root_run_uid = fields.UUID() driver_task_uid = fields.UUID(allow_none=True) task_run_env = fields.Nested(TaskRunEnvInfoSchema) task_runs_info = fields.Nested(TaskRunsInfoSchema) new_run_info = fields.Nested(RunInfoSchema, allow_none=True) scheduled_run_info = fields.Nested(ScheduledRunInfoSchema, allow_none=True) update_existing = fields.Boolean() source = fields.Str(allow_none=True) @post_load def make_init_run_args(self, data, **kwargs): return InitRunArgs(**data)
class LogMetricsSchema(_ApiCallSchema): metrics_info = fields.Nested(LogMetricSchema, many=True) @post_load def make_object(self, data, **kwargs): data["metrics_info"] = [ LogMetricArgs(**m) for m in data["metrics_info"] ]
class RunInfoSchema(ApiStrictSchema): root_run_uid = fields.UUID() run_uid = fields.UUID() job_name = fields.String() project_name = fields.String(allow_none=True) user = fields.String() name = fields.String() description = fields.String(allow_none=True) state = EnumField(RunState) start_time = fields.DateTime() end_time = fields.DateTime(allow_none=True) # deprecate dag_id = fields.String() cmd_name = fields.String(allow_none=True) execution_date = fields.DateTime() # move to task target_date = fields.Date(allow_none=True) version = fields.String(allow_none=True) driver_name = fields.String() is_archived = fields.Boolean() env_name = fields.String(allow_none=True) cloud_type = fields.String() trigger = fields.String() task_executor = fields.String(allow_none=True) root_run = fields.Nested(RootRunInfoSchema) scheduled_run = fields.Nested(ScheduledRunInfoSchema, allow_none=True) sends_heartbeat = fields.Boolean(default=False, allow_none=True) scheduled_job_name = fields.String(allow_none=True) scheduled_date = fields.DateTime(allow_none=True) external_links = fields.Dict(allow_none=True) @post_load def make_run_info(self, data, **kwargs): return _as_dotted_dict(**data)
class TaskRunsInfoSchema(ApiObjectSchema): run_uid = fields.UUID() root_run_uid = fields.UUID() task_run_env_uid = fields.UUID() parent_child_map = fields.List(fields.List(fields.UUID())) task_runs = fields.Nested(TaskRunInfoSchema, many=True, exclude=("task_signature_source", )) upstreams_map = fields.List(fields.List(fields.UUID())) dynamic_task_run_update = fields.Boolean() targets = fields.Nested(TargetInfoSchema, many=True) task_definitions = fields.Nested(TaskDefinitionInfoSchema, many=True) af_context = fields.Nested(AirflowTaskContextSchema, allow_none=True) @post_load def make_run_info(self, data, **kwargs): return _as_dotted_dict(**data)
class SyncedDatasetMetadataSchema(ApiStrictSchema): num_bytes = fields.Integer() records = fields.Integer() schema = fields.Nested(StructuredDataSchema) # TODO: ADD -> Preview @post_load def make_object(self, data): return SyncedDatasetMetadata(**data)
class SyncedDatasetSchema(ApiStrictSchema): uri = fields.String() # {storage_type://region/project_id/scheme_name/table_name} created_date = fields.DateTime() last_modified_date = fields.DateTime(allow_none=True) metadata = fields.Nested(SyncedDatasetMetadataSchema) uid = fields.UUID(allow_none=True) @post_load def make_object(self, data): return SyncedDataset(**data)
class TaskRunAttemptUpdateArgsSchema(ApiObjectSchema): task_run_uid = fields.UUID() task_run_attempt_uid = fields.UUID() state = EnumField(TaskRunState, allow_none=True) timestamp = fields.DateTime(allow_none=True) error = fields.Nested(ErrorInfoSchema, allow_none=True) attempt_number = fields.Number(allow_none=True) source = fields.Str(allow_none=True) start_date = fields.DateTime(allow_none=True) @post_load def make_object(self, data, **kwargs): return TaskRunAttemptUpdateArgs(**data)
class TaskRunInfoSchema(ApiObjectSchema): task_run_uid = fields.UUID() task_run_attempt_uid = fields.UUID() task_definition_uid = fields.UUID() run_uid = fields.UUID() task_id = fields.String() task_signature = fields.String() task_signature_source = fields.String() task_af_id = fields.String() execution_date = fields.DateTime() name = fields.String() env = fields.String() command_line = fields.String() functional_call = fields.String() has_downstreams = fields.Boolean() has_upstreams = fields.Boolean() is_reused = fields.Boolean() is_dynamic = fields.Boolean() is_system = fields.Boolean() is_skipped = fields.Boolean() is_root = fields.Boolean() output_signature = fields.String() state = EnumField(TaskRunState) target_date = fields.Date(allow_none=True) log_local = fields.String(allow_none=True) log_remote = fields.String(allow_none=True) version = fields.String() task_run_params = fields.Nested(TaskRunParamSchema, many=True) external_links = fields.Dict(allow_none=True) @post_load def make_task_run(self, data, **kwargs): return TaskRunInfo(**data)
class TaskDefinitionInfoSchema(ApiObjectSchema): task_definition_uid = fields.UUID() name = fields.String() class_version = fields.String() family = fields.String() module_source = fields.String(allow_none=True) module_source_hash = fields.String(allow_none=True) source = fields.String(allow_none=True) source_hash = fields.String(allow_none=True) type = fields.String() task_param_definitions = fields.Nested(TaskDefinitionParamSchema, many=True) @post_load def make_task_definition(self, data, **kwargs): return TaskDefinitionInfo(**data)
class DagSchema(ApiStrictSchema): description = fields.String() root_task_ids = fields.List(fields.String()) tasks = fields.Nested(TaskSchema, many=True) owner = fields.String() dag_id = fields.String() schedule_interval = fields.String() catchup = fields.Boolean(allow_none=True) start_date = fields.DateTime(allow_none=True) end_date = fields.DateTime(allow_none=True) is_committed = fields.Boolean() git_commit = fields.String() dag_folder = fields.String() hostname = fields.String() source_code = fields.String(allow_none=True) module_source_hash = fields.String(allow_none=True) is_subdag = fields.Boolean() tags = fields.List(fields.String(), allow_none=True) task_type = fields.String() task_args = fields.Dict() is_active = fields.Boolean(allow_none=True) is_paused = fields.Boolean(allow_none=True)
class UpdateTaskRunAttemptsSchema(_ApiCallSchema): task_run_attempt_updates = fields.Nested(TaskRunAttemptUpdateArgsSchema, many=True)
class AddTaskRunsSchema(_ApiCallSchema): task_runs_info = fields.Nested(TaskRunsInfoSchema) source = EnumField(UpdateSource, allow_none=True)
class InitRunSchema(_ApiCallSchema): init_args = fields.Nested(InitRunArgsSchema)
class LogMetricSchema(_ApiCallSchema): task_run_attempt_uid = fields.UUID(required=True) target_meta_uid = fields.UUID(allow_none=True) metric = fields.Nested(MetricSchema, allow_none=True) source = fields.String(allow_none=True)
class AddTaskRunsSchema(_ApiCallSchema): task_runs_info = fields.Nested(TaskRunsInfoSchema)
class AirflowTaskInfosSchema(_ApiCallSchema): airflow_task_infos = fields.Nested(AirflowTaskInfoSchema, many=True) source = fields.String() base_url = fields.String()
class LogDataframeHistogramListSchema(_ApiCallSchema): histograms_info = fields.Nested(LogDataframeHistogramSchema)
class LogTargetsSchema(_ApiCallSchema): targets_info = fields.Nested(LogTargetSchema, many=True)
class AirflowExportMetaSchema(ApiStrictSchema): airflow_version = fields.String() plugin_version = fields.String() request_args = fields.Dict() metrics = fields.Nested(MetricsSchema)
class ScheduledJobArgsSchema(_ApiCallSchema): scheduled_job_args = fields.Nested(ScheduledJobInfoSchema) update_existing = fields.Boolean(default=False)
class AlertDefsSchema(ApiStrictSchema): severity = fields.Str(required=True) type = fields.Str(required=True) user_metric = fields.Str() operator = fields.Str() is_str_value = fields.Bool() created_at = fields.DateTime() scheduled_job_name = fields.Str(attribute="scheduled_job.name") source_instance_name = fields.Method("get_tracking_source_name") env = fields.Method("get_tracking_source_env") # TODO_CORE: API: Deprecate airflow_server_info airflow_instance_name = fields.Method("get_tracking_source_name") project_id = fields.Int(attribute="job.project_id") project_name = fields.Str(attribute="job.project.name") alert_on_historical_runs = fields.Bool() group_uid = fields.Str(allow_none=True, load_from="alert_group_uid") root_group_uid = fields.Str(allow_none=True, load_from="alert_root_group_uid") uid = fields.Str(allow_none=True) value = fields.Str(allow_none=True) job_id = fields.Int(allow_none=True) summary = fields.Str(allow_none=True) job_name = fields.Str(attribute="job.name", allow_none=True) task_repr = fields.Str(allow_none=True) task_name = fields.Str(allow_none=True) custom_name = fields.Str(allow_none=True) original_uid = fields.Str(allow_none=True) advanced_json = fields.Str(allow_none=True) scheduled_job_uid = fields.Str(allow_none=True) custom_description = fields.Str(allow_none=True) ml_alert = fields.Nested(MLAlert, allow_none=True) # Fields for DatasetSlaAlert/DatasetSlaAdvancedAlert alert # -------------------------------------- seconds_delta = fields.Int( allow_none=True) # Converts to datetime.timedelta dataset_partial_name = fields.Str(allow_none=True) datasets_uids = fields.List(fields.Str(), allow_none=True) # Fields for OperationColumnStatAdvancedAlert alert # -------------------------------------- dataset_uid = fields.Str(allow_none=True) # Operation type (e.g. "read", "write", None=any) to filter stats by operation_type = fields.Str(allow_none=True) # Type of MetricRule, found in dbnd_web. Used to build advanced_json metrics_rules = fields.List(fields.Dict(), allow_none=True) # Used only used by the UI affected_datasets = fields.List(fields.Dict(), allow_none=True, dump_only=True) assigned_jobs = fields.Method(serialize="get_assigned_jobs", dump_only=True) is_system = fields.Function(lambda alert_def: alert_def.owner == "system", dump_only=True) def get_tracking_source_name(self, obj): return self._get_tracking_source_instance(obj).name def get_tracking_source_env(self, obj): return self._get_tracking_source_instance(obj).env def _get_tracking_source_instance(self, obj): if obj.job: return obj.job.tracking_source return obj.tracking_source @pre_load def prepere(self, data: dict, **kwargs): value = data.get("value", None) if value is not None: data["value"] = str(data["value"]) return data def get_assigned_jobs(self, alert_def): self_job = (alert_def.job_id, alert_def.job_name) sub_alerts_jobs = ((sub_alert.job_id, sub_alert.job_name) for sub_alert in alert_def.sub_alert_definitions) alert_jobs = chain(sub_alerts_jobs, [self_job]) alert_jobs = set(filter(lambda l: l != (None, None), alert_jobs)) serialized_assigned_jobs = [{ "job_id": job_id, "job_name": job_name } for job_id, job_name in alert_jobs] return serialized_assigned_jobs