예제 #1
0
class AirflowServerInfoSchema(ApiStrictSchema):
    airflow_version = fields.String(allow_none=True)
    airflow_export_version = fields.String(allow_none=True)
    airflow_monitor_version = fields.String(allow_none=True)
    last_sync_time = fields.DateTime(allow_none=True)
    monitor_error_message = fields.String(allow_none=True)
    synced_to = fields.DateTime(allow_none=True)
    api_mode = fields.String(allow_none=True)
    sync_interval = fields.Integer(allow_none=True)
    is_sync_enabled = fields.Boolean(allow_none=True)
    system_alert_definitions = fields.Dict()
    fetcher = fields.String(allow_none=True)
    composer_client_id = fields.String(allow_none=True)
    dag_ids = fields.String(allow_none=True)

    base_url = fields.String()
    external_url = fields.String(allow_none=True)
    source_instance_uid = fields.String(allow_none=True)
    tracking_source_uid = fields.String()
    airflow_instance_uid = fields.String(
        allow_none=True)  # TODO_API: deprecate
    name = fields.String(allow_none=True)
    env = fields.String(allow_none=True)
    monitor_status = fields.String(allow_none=True)
    monitor_config = fields.Nested(MonitorConfigSchema, allow_none=True)
    airflow_environment = fields.String(allow_none=True)
    last_seen_dag_run_id = fields.Integer(allow_none=True)
    last_seen_log_id = fields.Integer(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return AirflowServerInfo(**data)
예제 #2
0
class MonitorConfigSchema(ApiStrictSchema):
    include_sources = fields.Boolean(required=False)
    dag_run_bulk_size = fields.Integer(required=False)
    start_time_window = fields.Integer(required=False)
    log_bytes_from_head = fields.Integer(required=False)
    log_bytes_from_end = fields.Integer(required=False)
    use_async_tracking = fields.Boolean(required=False)
예제 #3
0
class ScheduledJobSchemaV2(Schema):
    class Meta:
        strict = True

    uid = fields.Str(attribute="DbndScheduledJob.uid", allow_none=True)
    name = fields.Str(attribute="DbndScheduledJob.name", required=True)
    cmd = fields.Str(attribute="DbndScheduledJob.cmd", required=True)
    schedule_interval = fields.Str(
        attribute="DbndScheduledJob.schedule_interval", required=True)
    start_date = fields.DateTime(allow_none=True,
                                 attribute="DbndScheduledJob.start_date",
                                 format="iso")
    end_date = fields.DateTime(allow_none=True,
                               attribute="DbndScheduledJob.end_date",
                               format="iso")
    readable_schedule_interval = fields.Str(
        attribute="DbndScheduledJob.readable_schedule_interval",
        allow_none=True)
    scheduled_interval_in_seconds = fields.Integer(
        attribute="DbndScheduledJob.scheduled_interval_in_seconds",
        allow_none=True)
    catchup = fields.Boolean(allow_none=True,
                             attribute="DbndScheduledJob.catchup")
    depends_on_past = fields.Boolean(
        allow_none=True, attribute="DbndScheduledJob.depends_on_past")
    retries = fields.Int(allow_none=True, attribute="DbndScheduledJob.retries")

    active = fields.Boolean(allow_none=True,
                            attribute="DbndScheduledJob.active")
    create_user = fields.Str(allow_none=True,
                             attribute="DbndScheduledJob.create_user")
    create_time = fields.DateTime(allow_none=True,
                                  attribute="DbndScheduledJob.create_time")
    update_user = fields.Str(allow_none=True,
                             attribute="DbndScheduledJob.update_user")
    update_time = fields.DateTime(allow_none=True,
                                  attribute="DbndScheduledJob.update_time")
    from_file = fields.Boolean(allow_none=True,
                               attribute="DbndScheduledJob.from_file")
    deleted_from_file = fields.Boolean(
        allow_none=True, attribute="DbndScheduledJob.deleted_from_file")
    next_job_date = fields.DateTime(attribute="DbndScheduledJob.next_job_date",
                                    allow_none=True)
    alerts = fields.List(
        fields.Nested(AlertEventSchema),
        attribute="DbndScheduledJob.alerts",
        allow_none=True,
    )

    job_name = fields.Str(dump_only=True,
                          attribute="DbndScheduledJob.job_name")
    last_run_uid = fields.UUID(dump_only=True)
    last_run_job = fields.Str(dump_only=True)
    last_job_date = fields.DateTime(dump_only=True)
    last_run_state = fields.Str(dump_only=True)
    is_airflow_synced = fields.Bool(dump_only=True)
    list_order = fields.Integer(attribute="DbndScheduledJob.list_order",
                                allow_none=True)
    validation_errors = fields.Str(
        allow_none=True, attribute="DbndScheduledJob.validation_errors")
예제 #4
0
class StructuredDataSchema(ApiStrictSchema):
    type = fields.String(allow_none=True)
    columns_names = fields.List(fields.String(),
                                load_from="columns",
                                dump_to="columns",
                                allow_none=True)
    columns_types = fields.Dict(load_from="dtypes",
                                dump_to="dtypes",
                                allow_none=True)
    # Shape: Tuple of ints-> (records, columns) -> fields.Tuple() added in marshmallow 3.0.0.
    shape = fields.List(fields.Integer(allow_none=True), allow_none=True)
    byte_size = fields.Integer(load_from="size.bytes",
                               dump_to="size.bytes",
                               allow_none=True)

    @pre_load
    def pre_load(self, data: dict) -> dict:
        # Support Snowflake & PostgreSQL data_schemas before dbnd-sdk-0.61.0
        columns_types = data.get("column_types")
        if columns_types:
            data["columns_types"] = columns_types
        return data

    @post_load
    def make_object(self, data) -> DataSchemaArgs:
        return DataSchemaArgs(**data)
예제 #5
0
class AirflowServerInfoSchema(_ApiCallSchema):
    base_url = fields.String()
    external_url = fields.String(allow_none=True)
    airflow_instance_uid = fields.String(allow_none=True)
    airflow_version = fields.String(allow_none=True)
    airflow_export_version = fields.String(allow_none=True)
    airflow_monitor_version = fields.String(allow_none=True)
    dags_path = fields.String(allow_none=True)
    logs_path = fields.String(allow_none=True)
    last_sync_time = fields.DateTime(allow_none=True)
    monitor_status = fields.String(allow_none=True)
    monitor_error_message = fields.String(allow_none=True)
    monitor_start_time = fields.DateTime(allow_none=True)
    synced_from = fields.DateTime(allow_none=True)
    synced_to = fields.DateTime(allow_none=True)
    api_mode = fields.String(allow_none=True)
    sync_interval = fields.Integer(allow_none=True)
    is_sync_enabled = fields.Boolean(allow_none=True)
    fetcher = fields.String(allow_none=True)
    composer_client_id = fields.String(allow_none=True)
    active_dags = fields.Dict(allow_none=True)
    name = fields.String(allow_none=True)
    env = fields.String(allow_none=True)
    include_logs = fields.Boolean(allow_none=True)
    include_task_args = fields.Boolean(allow_none=True)
    fetch_quantity = fields.Integer(allow_none=True)
    oldest_incomplete_data_in_days = fields.Integer(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return AirflowServerInfo(**data)
예제 #6
0
class SyncedDatasetMetadataSchema(ApiStrictSchema):
    num_bytes = fields.Integer()
    records = fields.Integer()

    schema = fields.Nested(StructuredDataSchema)

    # TODO: ADD -> Preview

    @post_load
    def make_object(self, data):
        return SyncedDatasetMetadata(**data)
예제 #7
0
class UpdateDagRunsRequestSchema(ApiStrictSchema):
    dag_runs = fields.Nested(DagRunSchema, many=True)
    task_instances = fields.Nested(TaskInstanceSchema, many=True)
    last_seen_log_id = fields.Integer(allow_none=True)

    airflow_export_meta = fields.Nested(AirflowExportMetaSchema, required=False)
    error_message = fields.String(required=False, allow_none=True)
    syncer_type = fields.String(allow_none=True)
예제 #8
0
파일: tracking_api.py 프로젝트: Dtchil/dbnd
class AirflowTaskContextSchema(ApiObjectSchema):
    dag_id = fields.String()
    execution_date = fields.String()
    task_id = fields.String()
    try_number = fields.Integer(allow_none=True)

    @post_load
    def make_run_info(self, data, **kwargs):
        return AirflowTaskContext(**data)
예제 #9
0
class LogTargetMetricsSchema(_ApiCallSchema):
    task_run_uid = fields.UUID(required=True)
    task_run_attempt_uid = fields.UUID(required=True)

    target_path = fields.String()

    value_preview = fields.String(allow_none=True)
    data_dimensions = fields.List(fields.Integer(), allow_none=True)
    data_schema = fields.String(allow_none=True)
예제 #10
0
class DagRunSchema(ApiStrictSchema):
    dag_id = fields.String(allow_none=True)
    run_id = fields.String(required=False)
    dagrun_id = fields.Integer()
    start_date = fields.DateTime(allow_none=True)
    state = fields.String()
    end_date = fields.DateTime(allow_none=True)
    execution_date = fields.DateTime(allow_none=True)
    task_args = fields.Dict()
예제 #11
0
class TaskInstanceSchema(ApiStrictSchema):
    execution_date = fields.DateTime()
    dag_id = fields.String()
    state = fields.String(allow_none=True)
    try_number = fields.Integer()
    task_id = fields.String()
    start_date = fields.DateTime(allow_none=True)
    end_date = fields.DateTime(allow_none=True)
    log_body = fields.String(allow_none=True)
    xcom_dict = fields.Dict()
예제 #12
0
파일: tracking_api.py 프로젝트: Dtchil/dbnd
class AirflowTaskInfoSchema(_ApiCallSchema):
    execution_date = fields.DateTime()
    last_sync = fields.DateTime(allow_none=True)
    dag_id = fields.String()
    task_id = fields.String()
    task_run_attempt_uid = fields.UUID()
    retry_number = fields.Integer(required=False, allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return AirflowTaskInfo(**data)
예제 #13
0
class SyncedTransactionOperationSchema(ApiStrictSchema):
    op_type = fields.String()
    started_date = fields.DateTime()  # type: datetime
    records_count = fields.Integer()

    dataset_uri = fields.String()
    dataset_uid = fields.UUID(allow_none=True)

    @post_load
    def make_object(self, data):
        return SyncedTransactionOperation(**data)
예제 #14
0
파일: common.py 프로젝트: turbaszek/dbnd
class MetricSchema(ApiObjectSchema):
    key = fields.String()
    value = fields.String(allow_none=True)
    value_int = fields.Integer(allow_none=True)
    value_float = fields.Float(allow_none=True)
    timestamp = fields.DateTime()

    @post_load
    def make_object(self, data, **kwargs):
        from dbnd._core.tracking.metrics import Metric

        return Metric(**data)
예제 #15
0
파일: common.py 프로젝트: databand-ai/dbnd
class MetricSchema(ApiStrictSchema):
    key = fields.String()
    value = fields.Raw(allow_none=True)
    value_str = fields.String(allow_none=True)
    value_json = fields.Raw(allow_none=True)
    value_int = fields.Integer(allow_none=True)
    value_float = fields.Float(allow_none=True)
    timestamp = fields.DateTime()

    @post_load
    def make_object(self, data, **kwargs):
        return Metric(**data)
예제 #16
0
class TaskSchema(ApiStrictSchema):
    upstream_task_ids = fields.List(fields.String())
    downstream_task_ids = fields.List(fields.String())
    task_type = fields.String()
    task_source_code = fields.String(allow_none=True)
    task_source_hash = fields.String(allow_none=True)
    task_module_code = fields.String(allow_none=True)
    module_source_hash = fields.String(allow_none=True)
    dag_id = fields.String()
    task_id = fields.String()
    retries = fields.Integer()
    command = fields.String(allow_none=True)
    task_args = fields.Dict()
예제 #17
0
파일: tracking_api.py 프로젝트: Dtchil/dbnd
class ScheduledJobInfoSchema(ApiObjectSchema):
    uid = fields.UUID()
    name = fields.String()
    cmd = fields.String()
    start_date = fields.DateTime()
    create_user = fields.String()
    create_time = fields.DateTime()
    end_date = fields.DateTime(allow_none=True)
    schedule_interval = fields.String(allow_none=True)
    catchup = fields.Boolean(allow_none=True)
    depends_on_past = fields.Boolean(allow_none=True)
    retries = fields.Integer(allow_none=True)
    active = fields.Boolean(allow_none=True)
    update_user = fields.String(allow_none=True)
    update_time = fields.DateTime(allow_none=True)
    from_file = fields.Boolean(allow_none=True)
    deleted_from_file = fields.Boolean(allow_none=True)
    list_order = fields.Integer(allow_none=True)
    job_name = fields.String(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return ScheduledJobInfo(**data)
예제 #18
0
파일: tracking_api.py 프로젝트: Dtchil/dbnd
class LogTargetSchema(_ApiCallSchema):
    run_uid = fields.UUID(required=True)
    task_run_uid = fields.UUID(required=True)
    task_run_name = fields.String()
    task_run_attempt_uid = fields.UUID(required=True)

    target_path = fields.String()
    param_name = fields.String(allow_none=True)
    task_def_uid = fields.UUID(allow_none=True)
    operation_type = EnumField(DbndTargetOperationType)
    operation_status = EnumField(DbndTargetOperationStatus)

    value_preview = fields.String(allow_none=True)
    data_dimensions = fields.List(fields.Integer(), allow_none=True)
    data_schema = fields.String(allow_none=True)
    data_hash = fields.String(allow_none=True)
예제 #19
0
class JobFromFileSchema(Schema):
    class Meta:
        strict = True

    name = fields.Str(required=True)
    cmd = fields.Str(required=True)
    schedule_interval = fields.Str(required=True)
    start_date = fields.DateTime(allow_none=False, required=True, format="iso")
    owner = fields.Str(allow_none=False)

    end_date = fields.DateTime(allow_none=True, format="iso")
    depends_on_past = fields.Boolean(allow_none=True)

    catchup = fields.Boolean(allow_none=True)
    retries = fields.Int(allow_none=True)

    list_order = fields.Integer(allow_none=True)
    active = fields.Boolean(allow_none=True)
예제 #20
0
class AirflowServerInfoSchema(_ApiCallSchema):
    base_url = fields.String()
    airflow_version = fields.String(allow_none=True)
    airflow_export_version = fields.String(allow_none=True)
    airflow_monitor_version = fields.String(allow_none=True)
    dags_path = fields.String(allow_none=True)
    logs_path = fields.String(allow_none=True)
    last_sync_time = fields.DateTime(allow_none=True)
    monitor_status = fields.String(allow_none=True)
    monitor_error_message = fields.String(allow_none=True)
    monitor_start_time = fields.DateTime(allow_none=True)
    synced_from = fields.DateTime(allow_none=True)
    synced_to = fields.DateTime(allow_none=True)
    rbac_enabled = fields.Boolean(allow_none=True)
    sync_interval = fields.Integer(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return AirflowServerInfo(**data)
예제 #21
0
class ColumnStatsSchema(ApiStrictSchema):
    column_name = fields.String(required=True)
    column_type = fields.String(required=True)

    records_count = fields.Integer(required=True)
    distinct_count = fields.Integer(required=False, allow_none=True)
    null_count = fields.Integer(required=False, allow_none=True)

    # Metric for non-numeric column type
    unique_count = fields.Integer(required=False, allow_none=True)
    # Most frequent value - load_from attrs to support backward compatibility with SDK < 0.59.0
    most_freq_value = fields.Raw(required=False,
                                 allow_none=True,
                                 load_from="top_value")
    most_freq_value_count = fields.Integer(required=False,
                                           allow_none=True,
                                           load_from="top_freq_count")

    # Metric for numeric column type
    mean_value = fields.Float(required=False, allow_none=True)
    min_value = fields.Float(required=False, allow_none=True)
    max_value = fields.Float(required=False, allow_none=True)
    std_value = fields.Float(required=False, allow_none=True)
    # Percentiles
    quartile_1 = fields.Float(required=False, allow_none=True)
    quartile_2 = fields.Float(required=False, allow_none=True)
    quartile_3 = fields.Float(required=False, allow_none=True)

    # Hybrid properties - We only dump them so we don't load them back to ColumnStatsArgs when we make_object
    # We don't save them in DB, and they are only computed in ColumnStatsArgs
    non_null_count = fields.Integer(required=False,
                                    allow_none=True,
                                    dump_only=True)
    null_percent = fields.Float(required=False,
                                allow_none=True,
                                dump_only=True)

    @post_load
    def make_object(self, data: dict) -> ColumnStatsArgs:
        return ColumnStatsArgs(**data)

    @post_dump
    def dump_object(self, data: dict) -> dict:
        return {k: v for k, v in data.items() if v is not None}
예제 #22
0
class UpdateLastSeenValuesRequestSchema(ApiStrictSchema):
    last_seen_dag_run_id = fields.Integer()
    last_seen_log_id = fields.Integer()
예제 #23
0
class JobsSetArchiveSchema(ApiStrictSchema):
    ids = fields.List(fields.Integer(),
                      required=True,
                      validate=validate.Length(min=1))
    is_archived = fields.Boolean(required=True)
예제 #24
0
class GetRunningDagRunsResponseSchema(ApiStrictSchema):
    dag_run_ids = fields.List(fields.Integer())
    last_seen_dag_run_id = fields.Integer(allow_none=True)
    last_seen_log_id = fields.Integer(allow_none=True)
예제 #25
0
class MLAlert(ApiStrictSchema):
    sensitivity = fields.Float()
    look_back = fields.Integer()