コード例 #1
0
ファイル: test_constraints.py プロジェクト: hhy5277/dagster
def test_in_range_value_column_constraint():
    test_dataframe = DataFrame({'foo': [1, 1, 2, 3]})
    assert (InRangeColumnConstraint(1, 4, ignore_missing_vals=False).validate(
        test_dataframe, 'foo') is None)
    with pytest.raises(ConstraintViolationException):
        InRangeColumnConstraint(2, 3, ignore_missing_vals=False).validate(
            test_dataframe, 'foo')
コード例 #2
0
ファイル: test_constraints.py プロジェクト: hhy5277/dagster
def test_in_range_value_column_constraint_ignore_nan():
    for nullable in NAN_VALUES:
        test_dataframe = DataFrame({'foo': [1, 1, 2, 3, nullable]})
        assert (InRangeColumnConstraint(1, 4,
                                        ignore_missing_vals=True).validate(
                                            test_dataframe, 'foo') is None)

        with pytest.raises(ConstraintViolationException):
            InRangeColumnConstraint(2, 3, ignore_missing_vals=True).validate(
                test_dataframe, 'foo')
コード例 #3
0
ファイル: validation.py プロジェクト: sarahmk125/dagster
    def datetime_column(
        name,
        min_datetime=Timestamp.min,
        max_datetime=Timestamp.max,
        non_nullable=False,
        unique=False,
        ignore_missing_vals=False,
        is_required=None,
        tz=None,
    ):
        """
        Simple constructor for PandasColumns that expresses datetime constraints on 'datetime64[ns]' dtypes.

        Args:
            name (str): Name of the column. This must match up with the column name in the dataframe you
                expect to receive.
            min_datetime (Optional[Union[int,float]]): The lower bound for values you expect in this column.
                Defaults to pandas.Timestamp.min.
            max_datetime (Optional[Union[int,float]]): The upper bound for values you expect in this column.
                Defaults to pandas.Timestamp.max.
            non_nullable (Optional[bool]): If true, this column will enforce a constraint that all values in the column
                ought to be non null values.
            unique (Optional[bool]): If true, this column will enforce a uniqueness constraint on the column values.
            ignore_missing_vals (Optional[bool]): A flag that is passed into most constraints. If true, the constraint will
                only evaluate non-null data. Ignore_missing_vals and non_nullable cannot both be True.
            is_required (Optional[bool]): Flag indicating the optional/required presence of the column.
                If the column exists the validate function will validate the column. Default to True.
            tz (Optional[str]): Required timezone for values eg: tz='UTC', tz='Europe/Dublin', tz='US/Eastern'.
                Defaults to None, meaning naive datetime values.
        """
        if tz is None:
            datetime_constraint = ColumnDTypeInSetConstraint({"datetime64[ns]"})
        else:
            datetime_constraint = ColumnDTypeInSetConstraint({f"datetime64[ns, {tz}]"})
            # One day more/less than absolute min/max to prevent OutOfBoundsDatetime errors when converting min/max to be tz aware
            if min_datetime.replace(tzinfo=None) == Timestamp.min:
                min_datetime = Timestamp("1677-09-22 00:12:43.145225Z")
            if max_datetime.replace(tzinfo=None) == Timestamp.max:
                max_datetime = Timestamp("2262-04-10 23:47:16.854775807Z")
            # Convert bounds to same tz
            if Timestamp(min_datetime).tz is None:
                min_datetime = Timestamp(min_datetime).tz_localize(tz)
            if Timestamp(max_datetime).tz is None:
                max_datetime = Timestamp(max_datetime).tz_localize(tz)

        return PandasColumn(
            name=check.str_param(name, "name"),
            constraints=[
                datetime_constraint,
                InRangeColumnConstraint(
                    min_datetime, max_datetime, ignore_missing_vals=ignore_missing_vals
                ),
            ]
            + _construct_keyword_constraints(
                non_nullable=non_nullable, unique=unique, ignore_missing_vals=ignore_missing_vals
            ),
            is_required=is_required,
        )
コード例 #4
0
ファイル: validation.py プロジェクト: zkan/dagster
 def datetime_column(
     name,
     min_datetime=Timestamp.min,
     max_datetime=Timestamp.max,
     non_nullable=False,
     unique=False,
 ):
     return PandasColumn(
         name=check.str_param(name, 'name'),
         constraints=[
             ColumnTypeConstraint({'datetime64[ns]'}),
             InRangeColumnConstraint(min_datetime, max_datetime),
         ] + _construct_keyword_constraints(non_nullable=non_nullable,
                                            unique=unique),
     )
コード例 #5
0
ファイル: test_data_frame.py プロジェクト: sd2k/dagster
def test_dataframe_description_generation_multi_constraints():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name="TestDataFrame",
        columns=[
            PandasColumn(
                name="foo",
                constraints=[
                    ColumnDTypeInSetConstraint({"int64"}),
                    InRangeColumnConstraint(0, 100, ignore_missing_vals=False),
                    NonNullableColumnConstraint(),
                ],
            ),
        ],
    )
    assert (
        TestDataFrame.description ==
        "\n### Columns\n**foo**: `int64`\n+ 0 < values < 100\n+ No Null values allowed.\n\n"
    )
コード例 #6
0
def test_dataframe_description_generation_multi_constraints():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name='TestDataFrame',
        columns=[
            PandasColumn(
                name='foo',
                constraints=[
                    ColumnTypeConstraint('int64'),
                    InRangeColumnConstraint(0, 100),
                    NonNullableColumnConstraint(),
                ],
            ),
        ],
    )
    assert (
        TestDataFrame.description ==
        "\n### Columns\n**foo**: `int64`\n+ 0 < values < 100\n+ No Null values allowed.\n\n"
    )
コード例 #7
0
ファイル: validation.py プロジェクト: zkan/dagster
 def numeric_column(
         name,
         expected_dtypes,
         min_value=-float('inf'),
         max_value=float('inf'),
         non_nullable=False,
         unique=False,
 ):
     return PandasColumn(
         name=check.str_param(name, 'name'),
         constraints=[
             ColumnTypeConstraint(expected_dtypes),
             InRangeColumnConstraint(
                 check.numeric_param(min_value, 'min_value'),
                 check.numeric_param(max_value, 'max_value'),
             ),
         ] + _construct_keyword_constraints(non_nullable=non_nullable,
                                            unique=unique),
     )
コード例 #8
0
 def datetime_column(
     cls,
     name,
     min_datetime=Timestamp.min,
     max_datetime=Timestamp.max,
     exists=False,
     unique=False,
 ):
     return cls(
         name=check.str_param(name, 'name'),
         constraints=cls.add_configurable_constraints(
             [
                 ColumnTypeConstraint({'datetime64[ns]'}),
                 InRangeColumnConstraint(min_datetime, max_datetime),
             ],
             exists=exists,
             unique=unique,
         ),
     )
コード例 #9
0
    def datetime_column(
        name,
        min_datetime=Timestamp.min,
        max_datetime=Timestamp.max,
        non_nullable=False,
        unique=False,
        ignore_missing_vals=False,
        is_required=None,
    ):
        """
        Simple constructor for PandasColumns that expresses datetime constraints on 'datetime64[ns]' dtypes.

        Args:
            name (str): Name of the column. This must match up with the column name in the dataframe you
                expect to receive.
            min_datetime (Optional[Union[int,float]]): The lower bound for values you expect in this column.
                Defaults to pandas.Timestamp.min.
            max_datetime (Optional[Union[int,float]]): The upper bound for values you expect in this column.
                Defaults to pandas.Timestamp.max.
            non_nullable (Optional[bool]): If true, this column will enforce a constraint that all values in the column
                ought to be non null values.
            unique (Optional[bool]): If true, this column will enforce a uniqueness constraint on the column values.
            ignore_missing_vals (Optional[bool]): A flag that is passed into most constraints. If true, the constraint will
                only evaluate non-null data. Ignore_missing_vals and non_nullable cannot both be True.
            is_required (Optional[bool]): Flag indicating the optional/required presence of the column.
                If the column exists the validate function will validate the column. Default to True.
        """
        return PandasColumn(
            name=check.str_param(name, "name"),
            constraints=[
                ColumnDTypeInSetConstraint({"datetime64[ns]"}),
                InRangeColumnConstraint(
                    min_datetime,
                    max_datetime,
                    ignore_missing_vals=ignore_missing_vals),
            ] + _construct_keyword_constraints(
                non_nullable=non_nullable,
                unique=unique,
                ignore_missing_vals=ignore_missing_vals),
            is_required=is_required,
        )
コード例 #10
0
ファイル: validation.py プロジェクト: keyz/dagster
    def float_column(
        name,
        min_value=-float("inf"),
        max_value=float("inf"),
        non_nullable=False,
        unique=False,
        ignore_missing_vals=False,
        is_required=None,
    ):
        """
        Simple constructor for PandasColumns that expresses numeric constraints on float dtypes.

        Args:
            name (str): Name of the column. This must match up with the column name in the dataframe you
                expect to receive.
            min_value (Optional[Union[int,float]]): The lower bound for values you expect in this column. Defaults to -float('inf')
            max_value (Optional[Union[int,float]]): The upper bound for values you expect in this column. Defaults to float('inf')
            non_nullable (Optional[bool]): If true, this column will enforce a constraint that all values in the column
                ought to be non null values.
            unique (Optional[bool]): If true, this column will enforce a uniqueness constraint on the column values.
            ignore_missing_vals (Optional[bool]): A flag that is passed into most constraints. If true, the constraint will
                only evaluate non-null data. Ignore_missing_vals and non_nullable cannot both be True.
            is_required (Optional[bool]): Flag indicating the optional/required presence of the column.
                If the column exists the validate function will validate the column. Default to True.
        """
        return PandasColumn(
            name=check.str_param(name, "name"),
            constraints=[
                ColumnDTypeFnConstraint(is_float_dtype),
                InRangeColumnConstraint(
                    check.numeric_param(min_value, "min_value"),
                    check.numeric_param(max_value, "max_value"),
                    ignore_missing_vals=ignore_missing_vals,
                ),
            ] + _construct_keyword_constraints(
                non_nullable=non_nullable,
                unique=unique,
                ignore_missing_vals=ignore_missing_vals),
            is_required=is_required,
        )
コード例 #11
0
 def numeric_column(
         cls,
         name,
         expected_dtypes,
         min_value=-float('inf'),
         max_value=float('inf'),
         exists=False,
         unique=False,
 ):
     return cls(
         name=check.str_param(name, 'name'),
         constraints=cls.add_configurable_constraints(
             [
                 ColumnTypeConstraint(expected_dtypes),
                 InRangeColumnConstraint(
                     check.numeric_param(min_value, 'min_value'),
                     check.numeric_param(max_value, 'max_value'),
                 ),
             ],
             exists=exists,
             unique=unique,
         ),
     )
コード例 #12
0
ファイル: test_constraints.py プロジェクト: zkan/dagster
def test_in_range_value_column_constraint():
    test_dataframe = DataFrame({'foo': [1, 1, 2, 3]})
    assert InRangeColumnConstraint(1, 4).validate(test_dataframe,
                                                  'foo') is None
    with pytest.raises(ConstraintViolationException):
        assert InRangeColumnConstraint(2, 3).validate(test_dataframe, 'foo')