def test_datetime_column_with_min_max_constraints_ok(): assert ( validate_constraints( DataFrame( { "datetime": [Timestamp("2021-03-14T12:34:56")], "datetime_utc_min_max_no_tz": [Timestamp("2021-03-14T12:34:56Z")], "datetime_utc_min_max_same_tz": [Timestamp("2021-03-14T12:34:56Z")], "datetime_utc_min_max_from_different_tz": [Timestamp("2021-03-14T12:34:56Z")], } ), pandas_columns=[ PandasColumn.datetime_column( "datetime_utc_min_max_no_tz", tz="UTC", min_datetime=Timestamp.min, max_datetime=Timestamp.max, ), PandasColumn.datetime_column( "datetime_utc_min_max_same_tz", tz="UTC", min_datetime=Timestamp("2021-01-01T00:00:00Z"), max_datetime=Timestamp("2021-12-01T00:00:00Z"), ), PandasColumn.datetime_column( "datetime_utc_min_max_from_different_tz", tz="UTC", min_datetime=Timestamp("2021-01-01T00:00:00Z", tz="US/Eastern"), max_datetime=Timestamp("2021-12-01T00:00:00Z"), ), ], ) is None )
def test_datetime_column_with_tz_validation_fails_when_incorrect_tz(): with pytest.raises(ConstraintViolationException): validate_constraints( DataFrame( { "datetime_utc": [Timestamp("2021-03-14T12:34:56")], } ), pandas_columns=[ PandasColumn.datetime_column("datetime_utc", tz="UTC"), ], )
def test_datetime_column_with_tz_validation_ok(): assert ( validate_constraints( DataFrame( { "datetime": [Timestamp("2021-03-14T12:34:56")], "datetime_utc": [Timestamp("2021-03-14T12:34:56Z")], "datetime_dublin": [Timestamp("2021-03-14T12:34:56", tz="Europe/Dublin")], "datetime_est": [Timestamp("2021-03-14T12:34:56", tz="US/Eastern")], "datetime_chatham": [Timestamp("2021-03-14T12:34:56", tz="Pacific/Chatham")], "datetime_utc_with_min_max": [Timestamp("2021-03-14T12:34:56Z")], } ), pandas_columns=[ PandasColumn.datetime_column("datetime"), PandasColumn.datetime_column("datetime_utc", tz="UTC"), PandasColumn.datetime_column("datetime_dublin", tz="Europe/Dublin"), PandasColumn.datetime_column("datetime_est", tz="US/Eastern"), PandasColumn.datetime_column("datetime_chatham", tz="Pacific/Chatham"), ], ) is None )
"min_start_time", "Date data collection started", ), EventMetadataEntry.text(str(max(dataframe["end_time"])), "max_end_time", "Timestamp of last trip"), EventMetadataEntry.text(str(len(dataframe)), "n_rows", "Number of rows seen in the dataframe"), EventMetadataEntry.text(str(dataframe.columns), "columns", "Keys of columns seen in the dataframe"), ] TripDataFrameSchema = [ PandasColumn.integer_column("bike_id", min_value=0), PandasColumn.datetime_column( "start_time", min_datetime=Timestamp(year=2017, month=1, day=1), ), PandasColumn.datetime_column( "end_time", min_datetime=Timestamp(year=2017, month=1, day=1), ), PandasColumn.string_column("interval_date"), ] RawTripDataFrame = create_dagster_pandas_dataframe_type( name="RawTripDataFrame", columns=[ PandasColumn(column.name) for column in TripDataFrameSchema if column.name != "interval_date" ], )
'min_start_time', 'Date data collection started', ), EventMetadataEntry.text(str(max(dataframe['end_time'])), 'max_end_time', 'Timestamp of last trip'), EventMetadataEntry.text(str(len(dataframe)), 'n_rows', 'Number of rows seen in the dataframe'), EventMetadataEntry.text(str(dataframe.columns), 'columns', 'Keys of columns seen in the dataframe'), ] TripDataFrameSchema = [ PandasColumn.integer_column('bike_id', min_value=0), PandasColumn.datetime_column( 'start_time', min_datetime=Timestamp(year=2018, month=1, day=1), ), PandasColumn.datetime_column( 'end_time', min_datetime=Timestamp(year=2018, month=1, day=1), ), PandasColumn.string_column('interval_date'), ] RawTripDataFrame = create_dagster_pandas_dataframe_type( name='RawTripDataFrame', columns=[ PandasColumn(column.name) for column in TripDataFrameSchema if column.name != 'interval_date' ], )