def test_datetime_column_with_min_max_constraints_ok():
    assert (
        validate_constraints(
            DataFrame(
                {
                    "datetime": [Timestamp("2021-03-14T12:34:56")],
                    "datetime_utc_min_max_no_tz": [Timestamp("2021-03-14T12:34:56Z")],
                    "datetime_utc_min_max_same_tz": [Timestamp("2021-03-14T12:34:56Z")],
                    "datetime_utc_min_max_from_different_tz": [Timestamp("2021-03-14T12:34:56Z")],
                }
            ),
            pandas_columns=[
                PandasColumn.datetime_column(
                    "datetime_utc_min_max_no_tz",
                    tz="UTC",
                    min_datetime=Timestamp.min,
                    max_datetime=Timestamp.max,
                ),
                PandasColumn.datetime_column(
                    "datetime_utc_min_max_same_tz",
                    tz="UTC",
                    min_datetime=Timestamp("2021-01-01T00:00:00Z"),
                    max_datetime=Timestamp("2021-12-01T00:00:00Z"),
                ),
                PandasColumn.datetime_column(
                    "datetime_utc_min_max_from_different_tz",
                    tz="UTC",
                    min_datetime=Timestamp("2021-01-01T00:00:00Z", tz="US/Eastern"),
                    max_datetime=Timestamp("2021-12-01T00:00:00Z"),
                ),
            ],
        )
        is None
    )
def test_datetime_column_with_tz_validation_fails_when_incorrect_tz():
    with pytest.raises(ConstraintViolationException):
        validate_constraints(
            DataFrame(
                {
                    "datetime_utc": [Timestamp("2021-03-14T12:34:56")],
                }
            ),
            pandas_columns=[
                PandasColumn.datetime_column("datetime_utc", tz="UTC"),
            ],
        )
def test_datetime_column_with_tz_validation_ok():
    assert (
        validate_constraints(
            DataFrame(
                {
                    "datetime": [Timestamp("2021-03-14T12:34:56")],
                    "datetime_utc": [Timestamp("2021-03-14T12:34:56Z")],
                    "datetime_dublin": [Timestamp("2021-03-14T12:34:56", tz="Europe/Dublin")],
                    "datetime_est": [Timestamp("2021-03-14T12:34:56", tz="US/Eastern")],
                    "datetime_chatham": [Timestamp("2021-03-14T12:34:56", tz="Pacific/Chatham")],
                    "datetime_utc_with_min_max": [Timestamp("2021-03-14T12:34:56Z")],
                }
            ),
            pandas_columns=[
                PandasColumn.datetime_column("datetime"),
                PandasColumn.datetime_column("datetime_utc", tz="UTC"),
                PandasColumn.datetime_column("datetime_dublin", tz="Europe/Dublin"),
                PandasColumn.datetime_column("datetime_est", tz="US/Eastern"),
                PandasColumn.datetime_column("datetime_chatham", tz="Pacific/Chatham"),
            ],
        )
        is None
    )
Exemple #4
0
            "min_start_time",
            "Date data collection started",
        ),
        EventMetadataEntry.text(str(max(dataframe["end_time"])),
                                "max_end_time", "Timestamp of last trip"),
        EventMetadataEntry.text(str(len(dataframe)), "n_rows",
                                "Number of rows seen in the dataframe"),
        EventMetadataEntry.text(str(dataframe.columns), "columns",
                                "Keys of columns seen in the dataframe"),
    ]


TripDataFrameSchema = [
    PandasColumn.integer_column("bike_id", min_value=0),
    PandasColumn.datetime_column(
        "start_time",
        min_datetime=Timestamp(year=2017, month=1, day=1),
    ),
    PandasColumn.datetime_column(
        "end_time",
        min_datetime=Timestamp(year=2017, month=1, day=1),
    ),
    PandasColumn.string_column("interval_date"),
]

RawTripDataFrame = create_dagster_pandas_dataframe_type(
    name="RawTripDataFrame",
    columns=[
        PandasColumn(column.name) for column in TripDataFrameSchema
        if column.name != "interval_date"
    ],
)
Exemple #5
0
            'min_start_time',
            'Date data collection started',
        ),
        EventMetadataEntry.text(str(max(dataframe['end_time'])),
                                'max_end_time', 'Timestamp of last trip'),
        EventMetadataEntry.text(str(len(dataframe)), 'n_rows',
                                'Number of rows seen in the dataframe'),
        EventMetadataEntry.text(str(dataframe.columns), 'columns',
                                'Keys of columns seen in the dataframe'),
    ]


TripDataFrameSchema = [
    PandasColumn.integer_column('bike_id', min_value=0),
    PandasColumn.datetime_column(
        'start_time',
        min_datetime=Timestamp(year=2018, month=1, day=1),
    ),
    PandasColumn.datetime_column(
        'end_time',
        min_datetime=Timestamp(year=2018, month=1, day=1),
    ),
    PandasColumn.string_column('interval_date'),
]

RawTripDataFrame = create_dagster_pandas_dataframe_type(
    name='RawTripDataFrame',
    columns=[
        PandasColumn(column.name) for column in TripDataFrameSchema
        if column.name != 'interval_date'
    ],
)