class AirflowServerInfoSchema(ApiStrictSchema): airflow_version = fields.String(allow_none=True) airflow_export_version = fields.String(allow_none=True) airflow_monitor_version = fields.String(allow_none=True) last_sync_time = fields.DateTime(allow_none=True) monitor_error_message = fields.String(allow_none=True) synced_to = fields.DateTime(allow_none=True) api_mode = fields.String(allow_none=True) sync_interval = fields.Integer(allow_none=True) is_sync_enabled = fields.Boolean(allow_none=True) system_alert_definitions = fields.Dict() fetcher = fields.String(allow_none=True) composer_client_id = fields.String(allow_none=True) dag_ids = fields.String(allow_none=True) base_url = fields.String() external_url = fields.String(allow_none=True) source_instance_uid = fields.String(allow_none=True) tracking_source_uid = fields.String() airflow_instance_uid = fields.String( allow_none=True) # TODO_API: deprecate name = fields.String(allow_none=True) env = fields.String(allow_none=True) monitor_status = fields.String(allow_none=True) monitor_config = fields.Nested(MonitorConfigSchema, allow_none=True) airflow_environment = fields.String(allow_none=True) last_seen_dag_run_id = fields.Integer(allow_none=True) last_seen_log_id = fields.Integer(allow_none=True) @post_load def make_object(self, data, **kwargs): return AirflowServerInfo(**data)
class MonitorConfigSchema(ApiStrictSchema): include_sources = fields.Boolean(required=False) dag_run_bulk_size = fields.Integer(required=False) start_time_window = fields.Integer(required=False) log_bytes_from_head = fields.Integer(required=False) log_bytes_from_end = fields.Integer(required=False) use_async_tracking = fields.Boolean(required=False)
class ScheduledJobSchemaV2(Schema): class Meta: strict = True uid = fields.Str(attribute="DbndScheduledJob.uid", allow_none=True) name = fields.Str(attribute="DbndScheduledJob.name", required=True) cmd = fields.Str(attribute="DbndScheduledJob.cmd", required=True) schedule_interval = fields.Str( attribute="DbndScheduledJob.schedule_interval", required=True) start_date = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.start_date", format="iso") end_date = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.end_date", format="iso") readable_schedule_interval = fields.Str( attribute="DbndScheduledJob.readable_schedule_interval", allow_none=True) scheduled_interval_in_seconds = fields.Integer( attribute="DbndScheduledJob.scheduled_interval_in_seconds", allow_none=True) catchup = fields.Boolean(allow_none=True, attribute="DbndScheduledJob.catchup") depends_on_past = fields.Boolean( allow_none=True, attribute="DbndScheduledJob.depends_on_past") retries = fields.Int(allow_none=True, attribute="DbndScheduledJob.retries") active = fields.Boolean(allow_none=True, attribute="DbndScheduledJob.active") create_user = fields.Str(allow_none=True, attribute="DbndScheduledJob.create_user") create_time = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.create_time") update_user = fields.Str(allow_none=True, attribute="DbndScheduledJob.update_user") update_time = fields.DateTime(allow_none=True, attribute="DbndScheduledJob.update_time") from_file = fields.Boolean(allow_none=True, attribute="DbndScheduledJob.from_file") deleted_from_file = fields.Boolean( allow_none=True, attribute="DbndScheduledJob.deleted_from_file") next_job_date = fields.DateTime(attribute="DbndScheduledJob.next_job_date", allow_none=True) alerts = fields.List( fields.Nested(AlertEventSchema), attribute="DbndScheduledJob.alerts", allow_none=True, ) job_name = fields.Str(dump_only=True, attribute="DbndScheduledJob.job_name") last_run_uid = fields.UUID(dump_only=True) last_run_job = fields.Str(dump_only=True) last_job_date = fields.DateTime(dump_only=True) last_run_state = fields.Str(dump_only=True) is_airflow_synced = fields.Bool(dump_only=True) list_order = fields.Integer(attribute="DbndScheduledJob.list_order", allow_none=True) validation_errors = fields.Str( allow_none=True, attribute="DbndScheduledJob.validation_errors")
class StructuredDataSchema(ApiStrictSchema): type = fields.String(allow_none=True) columns_names = fields.List(fields.String(), load_from="columns", dump_to="columns", allow_none=True) columns_types = fields.Dict(load_from="dtypes", dump_to="dtypes", allow_none=True) # Shape: Tuple of ints-> (records, columns) -> fields.Tuple() added in marshmallow 3.0.0. shape = fields.List(fields.Integer(allow_none=True), allow_none=True) byte_size = fields.Integer(load_from="size.bytes", dump_to="size.bytes", allow_none=True) @pre_load def pre_load(self, data: dict) -> dict: # Support Snowflake & PostgreSQL data_schemas before dbnd-sdk-0.61.0 columns_types = data.get("column_types") if columns_types: data["columns_types"] = columns_types return data @post_load def make_object(self, data) -> DataSchemaArgs: return DataSchemaArgs(**data)
class AirflowServerInfoSchema(_ApiCallSchema): base_url = fields.String() external_url = fields.String(allow_none=True) airflow_instance_uid = fields.String(allow_none=True) airflow_version = fields.String(allow_none=True) airflow_export_version = fields.String(allow_none=True) airflow_monitor_version = fields.String(allow_none=True) dags_path = fields.String(allow_none=True) logs_path = fields.String(allow_none=True) last_sync_time = fields.DateTime(allow_none=True) monitor_status = fields.String(allow_none=True) monitor_error_message = fields.String(allow_none=True) monitor_start_time = fields.DateTime(allow_none=True) synced_from = fields.DateTime(allow_none=True) synced_to = fields.DateTime(allow_none=True) api_mode = fields.String(allow_none=True) sync_interval = fields.Integer(allow_none=True) is_sync_enabled = fields.Boolean(allow_none=True) fetcher = fields.String(allow_none=True) composer_client_id = fields.String(allow_none=True) active_dags = fields.Dict(allow_none=True) name = fields.String(allow_none=True) env = fields.String(allow_none=True) include_logs = fields.Boolean(allow_none=True) include_task_args = fields.Boolean(allow_none=True) fetch_quantity = fields.Integer(allow_none=True) oldest_incomplete_data_in_days = fields.Integer(allow_none=True) @post_load def make_object(self, data, **kwargs): return AirflowServerInfo(**data)
class SyncedDatasetMetadataSchema(ApiStrictSchema): num_bytes = fields.Integer() records = fields.Integer() schema = fields.Nested(StructuredDataSchema) # TODO: ADD -> Preview @post_load def make_object(self, data): return SyncedDatasetMetadata(**data)
class UpdateDagRunsRequestSchema(ApiStrictSchema): dag_runs = fields.Nested(DagRunSchema, many=True) task_instances = fields.Nested(TaskInstanceSchema, many=True) last_seen_log_id = fields.Integer(allow_none=True) airflow_export_meta = fields.Nested(AirflowExportMetaSchema, required=False) error_message = fields.String(required=False, allow_none=True) syncer_type = fields.String(allow_none=True)
class AirflowTaskContextSchema(ApiObjectSchema): dag_id = fields.String() execution_date = fields.String() task_id = fields.String() try_number = fields.Integer(allow_none=True) @post_load def make_run_info(self, data, **kwargs): return AirflowTaskContext(**data)
class LogTargetMetricsSchema(_ApiCallSchema): task_run_uid = fields.UUID(required=True) task_run_attempt_uid = fields.UUID(required=True) target_path = fields.String() value_preview = fields.String(allow_none=True) data_dimensions = fields.List(fields.Integer(), allow_none=True) data_schema = fields.String(allow_none=True)
class DagRunSchema(ApiStrictSchema): dag_id = fields.String(allow_none=True) run_id = fields.String(required=False) dagrun_id = fields.Integer() start_date = fields.DateTime(allow_none=True) state = fields.String() end_date = fields.DateTime(allow_none=True) execution_date = fields.DateTime(allow_none=True) task_args = fields.Dict()
class TaskInstanceSchema(ApiStrictSchema): execution_date = fields.DateTime() dag_id = fields.String() state = fields.String(allow_none=True) try_number = fields.Integer() task_id = fields.String() start_date = fields.DateTime(allow_none=True) end_date = fields.DateTime(allow_none=True) log_body = fields.String(allow_none=True) xcom_dict = fields.Dict()
class AirflowTaskInfoSchema(_ApiCallSchema): execution_date = fields.DateTime() last_sync = fields.DateTime(allow_none=True) dag_id = fields.String() task_id = fields.String() task_run_attempt_uid = fields.UUID() retry_number = fields.Integer(required=False, allow_none=True) @post_load def make_object(self, data, **kwargs): return AirflowTaskInfo(**data)
class SyncedTransactionOperationSchema(ApiStrictSchema): op_type = fields.String() started_date = fields.DateTime() # type: datetime records_count = fields.Integer() dataset_uri = fields.String() dataset_uid = fields.UUID(allow_none=True) @post_load def make_object(self, data): return SyncedTransactionOperation(**data)
class MetricSchema(ApiObjectSchema): key = fields.String() value = fields.String(allow_none=True) value_int = fields.Integer(allow_none=True) value_float = fields.Float(allow_none=True) timestamp = fields.DateTime() @post_load def make_object(self, data, **kwargs): from dbnd._core.tracking.metrics import Metric return Metric(**data)
class MetricSchema(ApiStrictSchema): key = fields.String() value = fields.Raw(allow_none=True) value_str = fields.String(allow_none=True) value_json = fields.Raw(allow_none=True) value_int = fields.Integer(allow_none=True) value_float = fields.Float(allow_none=True) timestamp = fields.DateTime() @post_load def make_object(self, data, **kwargs): return Metric(**data)
class TaskSchema(ApiStrictSchema): upstream_task_ids = fields.List(fields.String()) downstream_task_ids = fields.List(fields.String()) task_type = fields.String() task_source_code = fields.String(allow_none=True) task_source_hash = fields.String(allow_none=True) task_module_code = fields.String(allow_none=True) module_source_hash = fields.String(allow_none=True) dag_id = fields.String() task_id = fields.String() retries = fields.Integer() command = fields.String(allow_none=True) task_args = fields.Dict()
class ScheduledJobInfoSchema(ApiObjectSchema): uid = fields.UUID() name = fields.String() cmd = fields.String() start_date = fields.DateTime() create_user = fields.String() create_time = fields.DateTime() end_date = fields.DateTime(allow_none=True) schedule_interval = fields.String(allow_none=True) catchup = fields.Boolean(allow_none=True) depends_on_past = fields.Boolean(allow_none=True) retries = fields.Integer(allow_none=True) active = fields.Boolean(allow_none=True) update_user = fields.String(allow_none=True) update_time = fields.DateTime(allow_none=True) from_file = fields.Boolean(allow_none=True) deleted_from_file = fields.Boolean(allow_none=True) list_order = fields.Integer(allow_none=True) job_name = fields.String(allow_none=True) @post_load def make_object(self, data, **kwargs): return ScheduledJobInfo(**data)
class LogTargetSchema(_ApiCallSchema): run_uid = fields.UUID(required=True) task_run_uid = fields.UUID(required=True) task_run_name = fields.String() task_run_attempt_uid = fields.UUID(required=True) target_path = fields.String() param_name = fields.String(allow_none=True) task_def_uid = fields.UUID(allow_none=True) operation_type = EnumField(DbndTargetOperationType) operation_status = EnumField(DbndTargetOperationStatus) value_preview = fields.String(allow_none=True) data_dimensions = fields.List(fields.Integer(), allow_none=True) data_schema = fields.String(allow_none=True) data_hash = fields.String(allow_none=True)
class JobFromFileSchema(Schema): class Meta: strict = True name = fields.Str(required=True) cmd = fields.Str(required=True) schedule_interval = fields.Str(required=True) start_date = fields.DateTime(allow_none=False, required=True, format="iso") owner = fields.Str(allow_none=False) end_date = fields.DateTime(allow_none=True, format="iso") depends_on_past = fields.Boolean(allow_none=True) catchup = fields.Boolean(allow_none=True) retries = fields.Int(allow_none=True) list_order = fields.Integer(allow_none=True) active = fields.Boolean(allow_none=True)
class AirflowServerInfoSchema(_ApiCallSchema): base_url = fields.String() airflow_version = fields.String(allow_none=True) airflow_export_version = fields.String(allow_none=True) airflow_monitor_version = fields.String(allow_none=True) dags_path = fields.String(allow_none=True) logs_path = fields.String(allow_none=True) last_sync_time = fields.DateTime(allow_none=True) monitor_status = fields.String(allow_none=True) monitor_error_message = fields.String(allow_none=True) monitor_start_time = fields.DateTime(allow_none=True) synced_from = fields.DateTime(allow_none=True) synced_to = fields.DateTime(allow_none=True) rbac_enabled = fields.Boolean(allow_none=True) sync_interval = fields.Integer(allow_none=True) @post_load def make_object(self, data, **kwargs): return AirflowServerInfo(**data)
class ColumnStatsSchema(ApiStrictSchema): column_name = fields.String(required=True) column_type = fields.String(required=True) records_count = fields.Integer(required=True) distinct_count = fields.Integer(required=False, allow_none=True) null_count = fields.Integer(required=False, allow_none=True) # Metric for non-numeric column type unique_count = fields.Integer(required=False, allow_none=True) # Most frequent value - load_from attrs to support backward compatibility with SDK < 0.59.0 most_freq_value = fields.Raw(required=False, allow_none=True, load_from="top_value") most_freq_value_count = fields.Integer(required=False, allow_none=True, load_from="top_freq_count") # Metric for numeric column type mean_value = fields.Float(required=False, allow_none=True) min_value = fields.Float(required=False, allow_none=True) max_value = fields.Float(required=False, allow_none=True) std_value = fields.Float(required=False, allow_none=True) # Percentiles quartile_1 = fields.Float(required=False, allow_none=True) quartile_2 = fields.Float(required=False, allow_none=True) quartile_3 = fields.Float(required=False, allow_none=True) # Hybrid properties - We only dump them so we don't load them back to ColumnStatsArgs when we make_object # We don't save them in DB, and they are only computed in ColumnStatsArgs non_null_count = fields.Integer(required=False, allow_none=True, dump_only=True) null_percent = fields.Float(required=False, allow_none=True, dump_only=True) @post_load def make_object(self, data: dict) -> ColumnStatsArgs: return ColumnStatsArgs(**data) @post_dump def dump_object(self, data: dict) -> dict: return {k: v for k, v in data.items() if v is not None}
class UpdateLastSeenValuesRequestSchema(ApiStrictSchema): last_seen_dag_run_id = fields.Integer() last_seen_log_id = fields.Integer()
class JobsSetArchiveSchema(ApiStrictSchema): ids = fields.List(fields.Integer(), required=True, validate=validate.Length(min=1)) is_archived = fields.Boolean(required=True)
class GetRunningDagRunsResponseSchema(ApiStrictSchema): dag_run_ids = fields.List(fields.Integer()) last_seen_dag_run_id = fields.Integer(allow_none=True) last_seen_log_id = fields.Integer(allow_none=True)
class MLAlert(ApiStrictSchema): sensitivity = fields.Float() look_back = fields.Integer()