예제 #1
0
from datetime import datetime

from dagster import OutputDefinition, pipeline, solid
from dagster.utils import script_relative_path
from dagster_pandas import RowCountConstraint, create_dagster_pandas_dataframe_type
from pandas import DataFrame, read_csv

# start_create_type
ShapeConstrainedTripDataFrame = create_dagster_pandas_dataframe_type(
    name="ShapeConstrainedTripDataFrame",
    dataframe_constraints=[RowCountConstraint(4)])
# end_create_type


@solid(output_defs=[
    OutputDefinition(name="shape_constrained_trip_dataframe",
                     dagster_type=ShapeConstrainedTripDataFrame)
])
def load_shape_constrained_trip_dataframe(_) -> DataFrame:
    return read_csv(
        script_relative_path("./ebike_trips.csv"),
        parse_dates=["start_time", "end_time"],
        date_parser=lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"),
    )


@pipeline
def shape_constrained_pipeline():
    load_shape_constrained_trip_dataframe()
예제 #2
0
from datetime import datetime

from dagster import Out, job, op
from dagster.utils import script_relative_path
from dagster_pandas import RowCountConstraint, create_dagster_pandas_dataframe_type
from pandas import DataFrame, read_csv

# start_create_type
ShapeConstrainedTripDataFrame = create_dagster_pandas_dataframe_type(
    name="ShapeConstrainedTripDataFrame", dataframe_constraints=[RowCountConstraint(4)]
)
# end_create_type


@op(out=Out(ShapeConstrainedTripDataFrame))
def load_shape_constrained_trip_dataframe() -> DataFrame:
    return read_csv(
        script_relative_path("./ebike_trips.csv"),
        parse_dates=["start_time", "end_time"],
        date_parser=lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"),
        dtype={"color": "category"},
    )


@job
def shape_constrained_trip():
    load_shape_constrained_trip_dataframe()