Example #1
0
class StructuredDataSchema(ApiStrictSchema):
    type = fields.String(allow_none=True)
    columns_names = fields.List(fields.String(),
                                load_from="columns",
                                dump_to="columns",
                                allow_none=True)
    columns_types = fields.Dict(load_from="dtypes",
                                dump_to="dtypes",
                                allow_none=True)
    # Shape: Tuple of ints-> (records, columns) -> fields.Tuple() added in marshmallow 3.0.0.
    shape = fields.List(fields.Integer(allow_none=True), allow_none=True)
    byte_size = fields.Integer(load_from="size.bytes",
                               dump_to="size.bytes",
                               allow_none=True)

    @pre_load
    def pre_load(self, data: dict) -> dict:
        # Support Snowflake & PostgreSQL data_schemas before dbnd-sdk-0.61.0
        columns_types = data.get("column_types")
        if columns_types:
            data["columns_types"] = columns_types
        return data

    @post_load
    def make_object(self, data) -> DataSchemaArgs:
        return DataSchemaArgs(**data)
Example #2
0
class TaskRunParamSchema(ApiObjectSchema):
    parameter_name = fields.String()
    value_origin = fields.String()
    value = fields.String()

    @post_load
    def make_task_run_param(self, data, **kwargs):
        return TaskRunParamInfo(**data)
Example #3
0
class UpdateDagRunsRequestSchema(ApiStrictSchema):
    dag_runs = fields.Nested(DagRunSchema, many=True)
    task_instances = fields.Nested(TaskInstanceSchema, many=True)
    last_seen_log_id = fields.Integer(allow_none=True)

    airflow_export_meta = fields.Nested(AirflowExportMetaSchema, required=False)
    error_message = fields.String(required=False, allow_none=True)
    syncer_type = fields.String(allow_none=True)
Example #4
0
class AirflowTaskContextSchema(ApiObjectSchema):
    dag_id = fields.String()
    execution_date = fields.String()
    task_id = fields.String()
    try_number = fields.Integer(allow_none=True)

    @post_load
    def make_run_info(self, data, **kwargs):
        return AirflowTaskContext(**data)
Example #5
0
class DagRunSchema(ApiStrictSchema):
    dag_id = fields.String(allow_none=True)
    run_id = fields.String(required=False)
    dagrun_id = fields.Integer()
    start_date = fields.DateTime(allow_none=True)
    state = fields.String()
    end_date = fields.DateTime(allow_none=True)
    execution_date = fields.DateTime(allow_none=True)
    task_args = fields.Dict()
Example #6
0
class LogTargetMetricsSchema(_ApiCallSchema):
    task_run_uid = fields.UUID(required=True)
    task_run_attempt_uid = fields.UUID(required=True)

    target_path = fields.String()

    value_preview = fields.String(allow_none=True)
    data_dimensions = fields.List(fields.Integer(), allow_none=True)
    data_schema = fields.String(allow_none=True)
Example #7
0
class ScheduledRunInfoSchema(ApiObjectSchema):
    scheduled_job_uid = fields.UUID(allow_none=True)
    scheduled_date = fields.DateTime(allow_none=True)
    scheduled_job_dag_run_id = fields.String(allow_none=True)
    scheduled_job_name = fields.String(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return _as_dotted_dict(**data)
Example #8
0
class BaseSourceMonitorStateSchema(ApiStrictSchema):
    monitor_version = fields.String(required=False, allow_none=True)
    monitor_status = fields.String(required=False, allow_none=True)
    monitor_error_message = fields.String(required=False, allow_none=True)
    monitor_start_time = fields.DateTime(required=False, allow_none=True)

    @post_load
    def make_object(self, data):
        return BaseSourceMonitorState(**data)
Example #9
0
class MLSaveModelSchema(_ApiCallSchema):
    model = fields.Dict()  # pickel
    job_name = fields.String()
    is_enabled = fields.Boolean(allow_none=True)
    data_set = fields.String(allow_none=True)  # df?

    @post_load
    def make_object(self, data, **kwargs):
        return MLSaveModel(**data)
Example #10
0
class TaskInstanceSchema(ApiStrictSchema):
    execution_date = fields.DateTime()
    dag_id = fields.String()
    state = fields.String(allow_none=True)
    try_number = fields.Integer()
    task_id = fields.String()
    start_date = fields.DateTime(allow_none=True)
    end_date = fields.DateTime(allow_none=True)
    log_body = fields.String(allow_none=True)
    xcom_dict = fields.Dict()
Example #11
0
class TargetInfoSchema(ApiObjectSchema):
    parameter_name = fields.String()

    path = fields.String()
    created_date = fields.DateTime(allow_none=True)
    task_run_uid = fields.UUID(allow_none=True)

    @post_load
    def make_target(self, data, **kwargs):
        return TargetInfo(**data)
Example #12
0
class AirflowTaskInfoSchema(_ApiCallSchema):
    execution_date = fields.DateTime()
    last_sync = fields.DateTime(allow_none=True)
    dag_id = fields.String()
    task_id = fields.String()
    task_run_attempt_uid = fields.UUID()
    retry_number = fields.Integer(required=False, allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return AirflowTaskInfo(**data)
Example #13
0
class SyncedTransactionOperationSchema(ApiStrictSchema):
    op_type = fields.String()
    started_date = fields.DateTime()  # type: datetime
    records_count = fields.Integer()

    dataset_uri = fields.String()
    dataset_uid = fields.UUID(allow_none=True)

    @post_load
    def make_object(self, data):
        return SyncedTransactionOperation(**data)
Example #14
0
class MetricSchema(ApiStrictSchema):
    key = fields.String()
    value = fields.Raw(allow_none=True)
    value_str = fields.String(allow_none=True)
    value_json = fields.Raw(allow_none=True)
    value_int = fields.Integer(allow_none=True)
    value_float = fields.Float(allow_none=True)
    timestamp = fields.DateTime()

    @post_load
    def make_object(self, data, **kwargs):
        return Metric(**data)
Example #15
0
class SyncedTransactionsReportSchema(ApiStrictSchema):
    sync_event_uid = fields.UUID()
    sync_event_timestamp = fields.DateTime()
    source_type = fields.String(allow_none=True)  # bigquery / snowflake / etc
    syncer_type = fields.String(allow_none=True)

    monitor_state = fields.Nested(DatasourceMonitorStateSchema)
    transactions = fields.Nested(SyncedTransactionSchema, many=True)

    @post_load
    def make_object(self, data):
        return SyncedTransactionsReport(**data)
Example #16
0
class DatasetsReportSchema(ApiStrictSchema):
    sync_event_uid = fields.UUID()
    monitor_state = fields.Nested(DatasourceMonitorStateSchema)
    source_type = fields.String(allow_none=True)  # bigquery / snowflake / etc
    syncer_type = fields.String(allow_none=True)

    sync_event_timestamp = fields.DateTime(required=False, allow_none=True)
    datasets = fields.Nested(SyncedDatasetSchema, many=True)

    @post_load
    def make_object(self, data):
        return DatasetsReport(**data)
Example #17
0
class MetricSchema(ApiObjectSchema):
    key = fields.String()
    value = fields.String(allow_none=True)
    value_int = fields.Integer(allow_none=True)
    value_float = fields.Float(allow_none=True)
    timestamp = fields.DateTime()

    @post_load
    def make_object(self, data, **kwargs):
        from dbnd._core.tracking.metrics import Metric

        return Metric(**data)
Example #18
0
class ErrorInfoSchema(ApiObjectSchema):
    msg = fields.String()
    help_msg = fields.String(allow_none=True)
    databand_error = fields.Bool()
    exc_type = fields.Function(lambda obj: str(obj.exc_type), allow_none=True)
    traceback = fields.String()
    nested = fields.String(allow_none=True)
    user_code_traceback = fields.String()
    show_exc_info = fields.Bool()

    @post_load
    def make_object(self, data, **kwargs):
        return ErrorInfo(**data)
Example #19
0
class TaskSchema(ApiStrictSchema):
    upstream_task_ids = fields.List(fields.String())
    downstream_task_ids = fields.List(fields.String())
    task_type = fields.String()
    task_source_code = fields.String(allow_none=True)
    task_source_hash = fields.String(allow_none=True)
    task_module_code = fields.String(allow_none=True)
    module_source_hash = fields.String(allow_none=True)
    dag_id = fields.String()
    task_id = fields.String()
    retries = fields.Integer()
    command = fields.String(allow_none=True)
    task_args = fields.Dict()
Example #20
0
class SyncedTransactionSchema(ApiStrictSchema):
    datasource_transaction_id = fields.String()
    created_date = fields.DateTime()
    started_date = fields.DateTime()
    ended_date = fields.DateTime()

    write_operation = fields.Nested(SyncedTransactionOperationSchema)
    read_operations = fields.Nested(
        SyncedTransactionOperationSchema, many=True, allow_none=True
    )

    query_string = fields.String(allow_none=True)
    data_schema = fields.Nested(StructuredDataSchema, allow_none=True)

    @post_load
    def make_object(self, data):
        return SyncedTransaction(**data)
Example #21
0
class RootRunInfoSchema(ApiObjectSchema):
    root_run_uid = fields.UUID()
    root_task_run_uid = fields.UUID(allow_none=True)
    root_run_url = fields.String(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return _as_dotted_dict(**data)
Example #22
0
class ColumnStatsSchema(ApiStrictSchema):
    column_name = fields.String(required=True)
    column_type = fields.String(required=True)

    records_count = fields.Integer(required=True)
    distinct_count = fields.Integer(required=False, allow_none=True)
    null_count = fields.Integer(required=False, allow_none=True)

    # Metric for non-numeric column type
    unique_count = fields.Integer(required=False, allow_none=True)
    # Most frequent value - load_from attrs to support backward compatibility with SDK < 0.59.0
    most_freq_value = fields.Raw(required=False,
                                 allow_none=True,
                                 load_from="top_value")
    most_freq_value_count = fields.Integer(required=False,
                                           allow_none=True,
                                           load_from="top_freq_count")

    # Metric for numeric column type
    mean_value = fields.Float(required=False, allow_none=True)
    min_value = fields.Float(required=False, allow_none=True)
    max_value = fields.Float(required=False, allow_none=True)
    std_value = fields.Float(required=False, allow_none=True)
    # Percentiles
    quartile_1 = fields.Float(required=False, allow_none=True)
    quartile_2 = fields.Float(required=False, allow_none=True)
    quartile_3 = fields.Float(required=False, allow_none=True)

    # Hybrid properties - We only dump them so we don't load them back to ColumnStatsArgs when we make_object
    # We don't save them in DB, and they are only computed in ColumnStatsArgs
    non_null_count = fields.Integer(required=False,
                                    allow_none=True,
                                    dump_only=True)
    null_percent = fields.Float(required=False,
                                allow_none=True,
                                dump_only=True)

    @post_load
    def make_object(self, data: dict) -> ColumnStatsArgs:
        return ColumnStatsArgs(**data)

    @post_dump
    def dump_object(self, data: dict) -> dict:
        return {k: v for k, v in data.items() if v is not None}
Example #23
0
class SyncedDatasetSchema(ApiStrictSchema):
    uri = fields.String()  # {storage_type://region/project_id/scheme_name/table_name}
    created_date = fields.DateTime()
    last_modified_date = fields.DateTime(allow_none=True)

    metadata = fields.Nested(SyncedDatasetMetadataSchema)

    uid = fields.UUID(allow_none=True)

    @post_load
    def make_object(self, data):
        return SyncedDataset(**data)
Example #24
0
class TaskDefinitionParamSchema(ApiObjectSchema):
    """
    Based on TaskDefinitionParam object
    """

    name = fields.String()

    default = fields.String(allow_none=True)

    description = fields.String()

    group = EnumField(ParameterGroup)
    kind = EnumField(_ParameterKind)
    load_on_build = fields.Boolean()

    significant = fields.Boolean()
    value_type = fields.String()

    @post_load
    def make_task_definition_param(self, data, **kwargs):
        return _as_dotted_dict(**data)
Example #25
0
class UpdateAirflowMonitorStateRequestSchema(ApiStrictSchema):
    airflow_version = fields.String(required=False, allow_none=True)
    airflow_export_version = fields.String(required=False, allow_none=True)
    airflow_monitor_version = fields.String(required=False, allow_none=True)
    monitor_status = fields.String(required=False, allow_none=True)
    monitor_error_message = fields.String(required=False, allow_none=True)
    airflow_instance_uid = fields.UUID(required=False, allow_none=True)
    api_mode = fields.String(required=False, allow_none=True)
Example #26
0
class AirflowServerInfoSchema(_ApiCallSchema):
    base_url = fields.String()
    airflow_version = fields.String(allow_none=True)
    airflow_export_version = fields.String(allow_none=True)
    airflow_monitor_version = fields.String(allow_none=True)
    dags_path = fields.String(allow_none=True)
    logs_path = fields.String(allow_none=True)
    last_sync_time = fields.DateTime(allow_none=True)
    monitor_status = fields.String(allow_none=True)
    monitor_error_message = fields.String(allow_none=True)
    monitor_start_time = fields.DateTime(allow_none=True)
    synced_from = fields.DateTime(allow_none=True)
    synced_to = fields.DateTime(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return AirflowServerInfo(**data)
Example #27
0
class TaskDefinitionInfoSchema(ApiObjectSchema):
    task_definition_uid = fields.UUID()
    name = fields.String()

    class_version = fields.String()
    family = fields.String()

    module_source = fields.String(allow_none=True)
    module_source_hash = fields.String(allow_none=True)

    source = fields.String(allow_none=True)
    source_hash = fields.String(allow_none=True)

    type = fields.String()

    task_param_definitions = fields.Nested(TaskDefinitionParamSchema, many=True)

    @post_load
    def make_task_definition(self, data, **kwargs):
        return TaskDefinitionInfo(**data)
Example #28
0
class TaskRunEnvInfoSchema(ApiObjectSchema):
    uid = fields.UUID()
    cmd_line = fields.String()

    user = fields.String()
    machine = fields.String()
    databand_version = fields.String()

    user_code_version = fields.String()
    user_code_committed = fields.Boolean()
    project_root = fields.String()

    user_data = fields.String()

    heartbeat = fields.DateTime()

    @post_load
    def make_object(self, data, **kwargs):
        return TaskRunEnvInfo(**data)
Example #29
0
class LogTargetSchema(_ApiCallSchema):
    run_uid = fields.UUID(required=True)
    task_run_uid = fields.UUID(required=True)
    task_run_name = fields.String()
    task_run_attempt_uid = fields.UUID(required=True)

    target_path = fields.String()
    param_name = fields.String(allow_none=True)
    task_def_uid = fields.UUID(allow_none=True)
    operation_type = EnumField(DbndTargetOperationType)
    operation_status = EnumField(DbndTargetOperationStatus)

    value_preview = fields.String(allow_none=True)
    data_dimensions = fields.List(fields.Integer(), allow_none=True)
    data_schema = fields.String(allow_none=True)
    data_hash = fields.String(allow_none=True)
Example #30
0
class ScheduledJobInfoSchema(ApiObjectSchema):
    uid = fields.UUID()
    name = fields.String()
    cmd = fields.String()
    start_date = fields.DateTime()
    create_user = fields.String()
    create_time = fields.DateTime()
    end_date = fields.DateTime(allow_none=True)
    schedule_interval = fields.String(allow_none=True)
    catchup = fields.Boolean(allow_none=True)
    depends_on_past = fields.Boolean(allow_none=True)
    retries = fields.Integer(allow_none=True)
    active = fields.Boolean(allow_none=True)
    update_user = fields.String(allow_none=True)
    update_time = fields.DateTime(allow_none=True)
    from_file = fields.Boolean(allow_none=True)
    deleted_from_file = fields.Boolean(allow_none=True)
    list_order = fields.Integer(allow_none=True)
    job_name = fields.String(allow_none=True)

    @post_load
    def make_object(self, data, **kwargs):
        return ScheduledJobInfo(**data)