Python PythonObjectDagsterTypeの例、dagster.PythonObjectDagsterType Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_dagster_type_decorator.py プロジェクト: sd2k/dagster

def test_make_dagster_type():
    SomeNamedTuple = collections.namedtuple("SomeNamedTuple", "prop")
    DagsterSomeNamedTuple = PythonObjectDagsterType(SomeNamedTuple)
    dagster_type = resolve_dagster_type(DagsterSomeNamedTuple)
    assert dagster_type.name == "SomeNamedTuple"
    assert SomeNamedTuple(prop="foo").prop == "foo"

    DagsterNewNameNamedTuple = PythonObjectDagsterType(SomeNamedTuple, name="OverwriteName")
    dagster_type = resolve_dagster_type(DagsterNewNameNamedTuple)
    assert dagster_type.name == "OverwriteName"

コード例 #2

0

ファイルを表示

ファイル: test_dagster_type_decorator.py プロジェクト: varokas/dagster-1

def test_make_dagster_type():
    SomeNamedTuple = collections.namedtuple('SomeNamedTuple', 'prop')
    DagsterSomeNamedTuple = PythonObjectDagsterType(SomeNamedTuple)
    dagster_type = resolve_dagster_type(DagsterSomeNamedTuple)
    assert dagster_type.name == 'SomeNamedTuple'
    assert SomeNamedTuple(prop='foo').prop == 'foo'

    DagsterNewNameNamedTuple = PythonObjectDagsterType(SomeNamedTuple,
                                                       name='OverwriteName')
    dagster_type = resolve_dagster_type(DagsterNewNameNamedTuple)
    assert dagster_type.name == 'OverwriteName'

コード例 #3

0

ファイルを表示

ファイル: test_type_guide.py プロジェクト: sd2k/dagster

def test_even_type_loader():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    @dagster_type_loader(int)
    def load_even_type(_, cfg):
        return EvenType(cfg)

    EvenDagsterType = PythonObjectDagsterType(EvenType, loader=load_even_type)

    @solid
    def double_even(_, even_num: EvenDagsterType) -> EvenDagsterType:
        return EvenType(even_num.num * 2)

    yaml_doc = """
    solids:
        double_even:
            inputs:
                even_num: 2
    """

    assert execute_solid(double_even, run_config=yaml.safe_load(yaml_doc)).success

    assert execute_solid(
        double_even, run_config={"solids": {"double_even": {"inputs": {"even_num": 2}}}}
    ).success

    # Same same as above w/r/t chatting to prha
    with pytest.raises(AssertionError):
        execute_solid(
            double_even, run_config={"solids": {"double_even": {"inputs": {"even_num": 3}}}}
        )

コード例 #4

0

ファイルを表示

ファイル: test_type_guide.py プロジェクト: plawler92/dagster

def test_even_type_materialization_config():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    @dagster_type_materializer({"path": str})
    def save_to_file_materialization(_, cfg, value):
        with open(cfg["path"], "w") as ff:
            ff.write(str(value))
            return AssetMaterialization(
                "path", "Wrote out value to {path}".format(path=path), metadata={"path": path}
            )

    EvenDagsterType = PythonObjectDagsterType(EvenType, materializer=save_to_file_materialization)

    @solid
    def double_even(_, even_num: EvenDagsterType) -> EvenDagsterType:
        return EvenType(even_num.num * 2)

    with safe_tempfile_path() as path:
        yaml_doc = """
solids:
    double_even:
        outputs:
            - result:
                path: {path}
 """
        solid_result = execute_solid(
            double_even,
            input_values={"even_num": EvenType(2)},
            run_config=yaml.safe_load(yaml_doc.format(path=path)),
        )
        assert solid_result.success

コード例 #5

0

ファイルを表示

def test_even_type_hydration_config():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    @input_hydration_config(int)
    def hydrate_even_type(_, cfg):
        return EvenType(cfg)

    EvenDagsterType = PythonObjectDagsterType(
        EvenType, input_hydration_config=hydrate_even_type)

    @solid
    def double_even(_, even_num: EvenDagsterType) -> EvenDagsterType:
        return EvenType(even_num.num * 2)

    yaml_doc = '''
    solids:
        double_even:
            inputs:
                even_num: 2
    '''

    assert execute_solid(double_even,
                         run_config=yaml.safe_load(yaml_doc)).success

    assert execute_solid(double_even,
                         run_config={
                             'solids': {
                                 'double_even': {
                                     'inputs': {
                                         'even_num': 2
                                     }
                                 }
                             }
                         }).success

    # Same same as above w/r/t chatting to prha
    with pytest.raises(AssertionError):
        execute_solid(double_even,
                      run_config={
                          'solids': {
                              'double_even': {
                                  'inputs': {
                                      'even_num': 3
                                  }
                              }
                          }
                      })

コード例 #6

0

ファイルを表示

ファイル: test_type_guide.py プロジェクト: sd2k/dagster

def test_make_usable_as_dagster_type():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    EvenDagsterType = PythonObjectDagsterType(EvenType, name="EvenDagsterType",)

    make_python_type_usable_as_dagster_type(EvenType, EvenDagsterType)

    @solid
    def double_even(_, even_num: EvenType) -> EvenType:
        return EvenType(even_num.num * 2)

    assert execute_solid(double_even, input_values={"even_num": EvenType(2)}).success

コード例 #7

0

ファイルを表示

ファイル: test_type_guide.py プロジェクト: sd2k/dagster

def test_mypy_compliance():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    if typing.TYPE_CHECKING:
        EvenDagsterType = EvenType
    else:
        EvenDagsterType = PythonObjectDagsterType(EvenType)

    @solid
    def double_even(_, even_num: EvenDagsterType) -> EvenDagsterType:
        return EvenType(even_num.num * 2)

    assert execute_solid(double_even, input_values={"even_num": EvenType(2)}).success

コード例 #8

0

ファイルを表示

ファイル: test_type_guide.py プロジェクト: sd2k/dagster

def test_python_object_dagster_type():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    EvenDagsterType = PythonObjectDagsterType(EvenType, name="EvenDagsterType")

    @solid
    def double_even(_, even_num: EvenDagsterType) -> EvenDagsterType:
        # These type annotations are a shorthand for constructing InputDefinitions
        # and OutputDefinitions, and are not mypy compliant
        return EvenType(even_num.num * 2)

    assert execute_solid(double_even, input_values={"even_num": EvenType(2)}).success
    with pytest.raises(AssertionError):
        execute_solid(double_even, input_values={"even_num": EvenType(3)})

コード例 #9

0

ファイルを表示

ファイル: test_input_manager.py プロジェクト: richardwhitefoot/dagster

def test_validate_inputs():
    @root_input_manager
    def my_loader(_):
        return 5

    @solid(input_defs=[
        InputDefinition("input1",
                        dagster_type=PythonObjectDagsterType(int),
                        root_manager_key="my_loader")
    ])
    def my_solid(_, input1):
        return input1

    @pipeline(
        mode_defs=[ModeDefinition(resource_defs={"my_loader": my_loader})])
    def my_pipeline():
        my_solid()

    execute_pipeline(my_pipeline)

コード例 #10

0

ファイルを表示

def test_even_type_materialization_config():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    @output_materialization_config({'path': str})
    def save_to_file_materialization(_, cfg, value):
        with open(cfg['path'], 'w') as ff:
            ff.write(str(value))
            return Materialization(
                'path',
                'Wrote out value to {path}'.format(path=path),
                metadata_entries=[EventMetadataEntry.text('path', path)],
            )

    EvenDagsterType = PythonObjectDagsterType(
        EvenType, output_materialization_config=save_to_file_materialization)

    @solid
    def double_even(_, even_num: EvenDagsterType) -> EvenDagsterType:
        return EvenType(even_num.num * 2)

    with safe_tempfile_path() as path:
        yaml_doc = '''
solids:
    double_even:
        outputs:
            - result:
                path: {path}
 '''
        solid_result = execute_solid(
            double_even,
            input_values={'even_num': EvenType(2)},
            run_config=yaml.safe_load(yaml_doc.format(path=path)),
        )
        assert solid_result.success

コード例 #11

0

ファイルを表示

ファイル: object_type.py プロジェクト: prezi/dagster

from dagster import PythonObjectDagsterType, solid


# start_object_type
class EvenType:
    def __init__(self, num):
        assert num % 2 is 0
        self.num = num


EvenDagsterType = PythonObjectDagsterType(EvenType, name="EvenDagsterType")
# end_object_type

# start_use_object_type
@solid
def double_even(even_num: EvenDagsterType) -> EvenDagsterType:
    return EvenType(even_num.num * 2)


# end_use_object_type

コード例 #12

0

ファイルを表示

ファイル: setup.py プロジェクト: helloworld/dagster

        ]


@dagster_type_materializer(String)
def df_output_schema(_context, path, value):
    with open(path, "w") as fd:
        writer = csv.DictWriter(fd, fieldnames=value[0].keys())
        writer.writeheader()
        writer.writerows(rowdicts=value)

    return AssetMaterialization.file(path)


PoorMansDataFrame = PythonObjectDagsterType(
    python_type=list,
    name="PoorMansDataFrame",
    loader=df_input_schema,
    materializer=df_output_schema,
)


@contextmanager
def define_test_out_of_process_context(instance):
    check.inst_param(instance, "instance", DagsterInstance)
    with define_out_of_process_context(__file__, main_repo_name(),
                                       instance) as context:
        yield context


def create_main_recon_repo():
    return ReconstructableRepository.for_file(__file__, main_repo_name())

コード例 #13

0

ファイルを表示

from dagster import (
    Bool,
    Field,
    Int,
    PythonObjectDagsterType,
    String,
    composite_solid,
    execute_pipeline,
    pipeline,
    solid,
)

if typing.TYPE_CHECKING:
    DataFrame = list
else:
    DataFrame = PythonObjectDagsterType(list, name="DataFrame")  # type: Any


@solid(
    config_schema={
        "delimiter":
        Field(
            String,
            default_value=",",
            is_required=False,
            description=("A one-character string used to separate fields."),
        ),
        "doublequote":
        Field(
            Bool,
            default_value=False,

コード例 #14

0

ファイルを表示

ファイル: types.py プロジェクト: JPeer264/dagster-fork

class SparkDataFrameFilesystemStoragePlugin(TypeStoragePlugin):  # pylint: disable=no-init
    @classmethod
    def compatible_with_storage_def(cls, system_storage_def):
        return system_storage_def is fs_system_storage

    @classmethod
    def set_object(cls, intermediate_store, obj, _context, _dagster_type, paths):
        target_path = os.path.join(intermediate_store.root, *paths)
        obj.write.parquet(intermediate_store.uri_for_paths(paths))
        return target_path

    @classmethod
    def get_object(cls, intermediate_store, context, _dagster_type, paths):
        return context.resources.pyspark.spark_session.read.parquet(
            os.path.join(intermediate_store.root, *paths)
        )

    @classmethod
    def required_resource_keys(cls):
        return frozenset({'pyspark'})


DataFrame = PythonObjectDagsterType(
    python_type=NativeSparkDataFrame,
    name='PySparkDataFrame',
    description='A PySpark data frame.',
    auto_plugins=[SparkDataFrameS3StoragePlugin, SparkDataFrameFilesystemStoragePlugin],
    output_materialization_config=spark_df_output_schema,
)

コード例 #15

0

ファイルを表示

ファイル: types.py プロジェクト: zkan/dagster

"""Type definitions for the airline_demo."""

from collections import namedtuple

import sqlalchemy

from dagster import PythonObjectDagsterType
from dagster.core.types.dagster_type import create_string_type

AirlineDemoResources = namedtuple(
    'AirlineDemoResources',
    ('spark', 's3', 'db_url', 'db_engine', 'db_dialect',
     'redshift_s3_temp_dir', 'db_load'),
)

SqlAlchemyEngineType = PythonObjectDagsterType(
    sqlalchemy.engine.Connectable,
    name='SqlAlchemyEngineType',
    description='A SqlAlchemy Connectable',
)

SqlTableName = create_string_type('SqlTableName',
                                  description='The name of a database table')

コード例 #16

0

ファイルを表示

from dagster import solid, SolidExecutionContext, Field, Array, String, PythonObjectDagsterType, make_python_type_usable_as_dagster_type
from typing import Any, Optional, List, TYPE_CHECKING
from azmeta.access.specifications import AzureComputeSpecifications, load_compute_specifications

AzureComputeSpecificationsDagsterType = PythonObjectDagsterType(
    AzureComputeSpecifications)
make_python_type_usable_as_dagster_type(AzureComputeSpecifications,
                                        AzureComputeSpecificationsDagsterType)


@solid(
    config_schema={
        'subscription':
        Field(String,
              is_required=False,
              description='The subscription ID to list SKUs from.')
    })
def load_compute_specs(
        context: SolidExecutionContext) -> AzureComputeSpecifications:
    return load_compute_specifications(logger=context.log)

コード例 #17

0

ファイルを表示


PositiveNumber = DagsterType(
    name="PostivieNumber",
    description="Only take in numbers greater than zero",
    type_check_fn=positive_num_check,
    loader=positive_num_loader)


# How to use PythonObjectDagsterType
class PercentType:
    def __init__(self, number):
        self.value = number * 100


PercentDagsterType = PythonObjectDagsterType(PercentType,
                                             name="PercentDagsterType")


@solid(
    input_defs=[
        InputDefinition("num1", PositiveNumber),
        InputDefinition("num2", PositiveNumber)
    ],
    output_defs=[OutputDefinition(PercentDagsterType)]  # mypy compliance
)
@solid
def add_two_nums(_context, num1: PositiveNumber, num2: PositiveNumber
                 ):  # mypy compliance only works for naked python type
    adding = num1 + num2  # 2 + 3 => 5
    add_percent_type = PercentType(adding)  # object 5
    yield ExpectationResult(

コード例 #18

0

ファイルを表示

ファイル: table.py プロジェクト: nikie/dagster

def create_lakehouse_table_def(
    name,
    lakehouse_fn,
    input_tables=None,
    other_input_defs=None,
    required_resource_keys=None,
    tags=None,
    description=None,
):
    input_tables = check.opt_list_param(input_tables,
                                        input_tables,
                                        of_type=LakehouseTableInputDefinition)
    other_input_defs = check.opt_list_param(other_input_defs,
                                            other_input_defs,
                                            of_type=InputDefinition)
    required_resource_keys = check.opt_set_param(required_resource_keys,
                                                 'required_resource_keys',
                                                 of_type=str)

    table_type = PythonObjectDagsterType(python_type=ITableHandle,
                                         name=name,
                                         description=description)

    table_input_dict = {
        input_table.name: input_table
        for input_table in input_tables
    }
    input_defs = input_tables + other_input_defs
    validate_solid_fn('@solid', name, lakehouse_fn, input_defs, ['context'])

    def _compute(context, inputs):
        '''
        Workhouse function of lakehouse. The inputs are something that inherits from ITableHandle.
        This compute_fn:
        (1) Iterates over input tables and ask the lakehouse resource to
         hydrate their contents or a representation of their contents
         (e.g a pyspark dataframe) into memory for computation
        (2) Pass those into the lakehouse table function. Do the actual thing.
        (3) Pass the output of the lakehouse function to the lakehouse materialize function.
        (4) Yield a materialization if the lakehouse function returned that.


        There's an argument that the hydrate and materialize functions should return
        a stream of events but that started to feel like I was implementing what should
        be a framework feature.
        '''
        check.inst_param(context.resources.lakehouse,
                         'context.resources.lakehouse', Lakehouse)

        # hydrate tables
        hydrated_tables = {}
        other_inputs = {}
        for input_name, value in inputs.items():
            context.log.info(
                'About to hydrate table {input_name} for use in {name}'.format(
                    input_name=input_name, name=name))
            if input_name in table_input_dict:
                table_handle = value
                input_type = table_input_dict[input_name].runtime_type
                hydrated_tables[
                    input_name] = context.resources.lakehouse.hydrate(
                        context,
                        input_type,
                        table_def_of_type(context.pipeline_def,
                                          input_type.name).tags,
                        table_handle,
                        tags,
                    )
            else:
                other_inputs[input_name] = value

        # call user-provided business logic which operates on the hydrated values
        # (as opposed to the handles)
        computed_output = lakehouse_fn(context, **hydrated_tables,
                                       **other_inputs)

        materialization, output_table_handle = context.resources.lakehouse.materialize(
            context, table_type, tags, computed_output)

        if materialization:
            yield materialization

        # just pass in a dummy handle for now if the materialize function
        # does not return one
        yield Output(
            output_table_handle if output_table_handle else TableHandle())

    required_resource_keys.add('lakehouse')

    return LakehouseTableDefinition(
        lakehouse_fn=lakehouse_fn,
        name=name,
        input_tables=input_tables,
        input_defs=input_defs,
        output_defs=[OutputDefinition(table_type)],
        compute_fn=_compute,
        required_resource_keys=required_resource_keys,
        tags=tags,
        description=description,
    )

コード例 #19

0

ファイルを表示

ファイル: types.py プロジェクト: varokas/dagster-1

    def compatible_with_storage_def(cls, system_storage_def):
        return (system_storage_def is fs_system_storage
                or system_storage_def is fs_intermediate_storage)

    @classmethod
    def set_object(cls, intermediate_store, obj, _context, _dagster_type,
                   paths):
        target_path = os.path.join(intermediate_store.root, *paths)
        obj.write.parquet(intermediate_store.uri_for_paths(paths))
        return target_path

    @classmethod
    def get_object(cls, intermediate_store, context, _dagster_type, paths):
        return context.resources.pyspark.spark_session.read.parquet(
            os.path.join(intermediate_store.root, *paths))

    @classmethod
    def required_resource_keys(cls):
        return frozenset({'pyspark'})


DataFrame = PythonObjectDagsterType(
    python_type=NativeSparkDataFrame,
    name='PySparkDataFrame',
    description='A PySpark data frame.',
    auto_plugins=[
        SparkDataFrameS3StoragePlugin, SparkDataFrameFilesystemStoragePlugin
    ],
    materializer=spark_df_materializer,
)

コード例 #20

0

ファイルを表示

ファイル: composite_solids.py プロジェクト: JPeer264/dagster-fork

from dagster import (
    Bool,
    Field,
    Int,
    PythonObjectDagsterType,
    String,
    composite_solid,
    execute_pipeline,
    pipeline,
    solid,
)

if typing.TYPE_CHECKING:
    DataFrame = list
else:
    DataFrame = PythonObjectDagsterType(list, name='DataFrame')  # type: Any


@solid(
    config={
        'delimiter':
        Field(
            String,
            default_value=',',
            is_required=False,
            description=('A one-character string used to separate fields.'),
        ),
        'doublequote':
        Field(
            Bool,
            default_value=False,

コード例 #21

0

ファイルを表示

ファイル: types.py プロジェクト: keyz/dagster

    if file_type == "csv":
        return spark_read.csv(path, **dict_without_keys(file_options, "path"))
    elif file_type == "parquet":
        return spark_read.parquet(path,
                                  **dict_without_keys(file_options, "path"))
    elif file_type == "json":
        return spark_read.json(path, **dict_without_keys(file_options, "path"))
    elif file_type == "jdbc":
        return spark_read.jdbc(**file_options)
    elif file_type == "orc":
        return spark_read.orc(path, **dict_without_keys(file_options, "path"))
    elif file_type == "table":
        return spark_read.table(**file_options)
    elif file_type == "text":
        return spark_read.text(path, **dict_without_keys(file_options, "path"))
    elif file_type == "other":
        return spark_read.load(**file_options)
    else:
        raise DagsterInvariantViolationError(
            "Unsupported file_type {file_type}".format(file_type=file_type))


DataFrame = PythonObjectDagsterType(
    python_type=NativeSparkDataFrame,
    name="PySparkDataFrame",
    description="A PySpark data frame.",
    loader=dataframe_loader,
    materializer=dataframe_materializer,
)

コード例 #22

0

ファイルを表示

        ]


@output_materialization_config(String)
def df_output_schema(_context, path, value):
    with open(path, 'w') as fd:
        writer = csv.DictWriter(fd, fieldnames=value[0].keys())
        writer.writeheader()
        writer.writerows(rowdicts=value)

    return Materialization.file(path)


PoorMansDataFrame = PythonObjectDagsterType(
    python_type=list,
    name='PoorMansDataFrame',
    input_hydration_config=df_input_schema,
    output_materialization_config=df_output_schema,
)


def define_test_subprocess_context(instance):
    check.inst_param(instance, 'instance', DagsterInstance)
    return define_subprocess_context_for_file(__file__, "test_repo", instance)


def define_test_context(instance):
    check.inst_param(instance, 'instance', DagsterInstance)
    return define_context_for_file(__file__, "test_repo", instance)


def create_main_recon_repo():

コード例 #23

0

ファイルを表示

ファイル: __init__.py プロジェクト: zkan/dagster

    required_resource_keys={'spark'},
)
def write_rdd(context, file_type, file_options, spark_rdd):
    if file_type == 'csv':
        df = context.resources.spark.spark_session.createDataFrame(spark_rdd)
        context.log.info('DF: {}'.format(df))
        df.write.csv(file_options['path'],
                     header=file_options.get('header'),
                     sep=file_options.get('sep'))
    else:
        check.failed('Unsupported file type: {}'.format(file_type))


SparkRDD = PythonObjectDagsterType(
    python_type=RDD,
    name='SparkRDD',
    input_hydration_config=load_rdd,
    output_materialization_config=write_rdd,
)


@output_selector_schema(
    Selector({
        'csv': {
            'path': Field(Path),
            'sep': Field(String, is_required=False),
            'header': Field(Bool, is_required=False),
        },
    }))
def spark_df_output_schema(_context, file_type, file_options, spark_df):
    if file_type == 'csv':
        spark_df.write.csv(file_options['path'],

コード例 #24

0

ファイルを表示

ファイル: types.py プロジェクト: zuik/dagster

        target_path = os.path.join(intermediate_storage.root, *paths)
        value.write.parquet(intermediate_storage.uri_for_paths(paths))
        return target_path

    @classmethod
    def get_intermediate_object(
        cls, intermediate_storage, context, _dagster_type, step_output_handle
    ):
        paths = ["intermediates", step_output_handle.step_key, step_output_handle.output_name]
        return context.resources.pyspark.spark_session.read.parquet(
            os.path.join(intermediate_storage.root, *paths)
        )

    @classmethod
    def required_resource_keys(cls):
        return frozenset({"pyspark"})


DataFrame = PythonObjectDagsterType(
    python_type=NativeSparkDataFrame,
    name="PySparkDataFrame",
    description="A PySpark data frame.",
    auto_plugins=[
        SparkDataFrameS3StoragePlugin,
        SparkDataFrameADLS2StoragePlugin,
        SparkDataFrameFilesystemStoragePlugin,
    ],
    loader=dataframe_loader,
    materializer=dataframe_materializer,
)

コード例 #25

0

ファイルを表示

ファイル: __init__.py プロジェクト: Watson-pub/test-dagster

from dagster import make_python_type_usable_as_dagster_type, PythonObjectDagsterType, input_hydration_config, Selector, \
    Int, Field
from datetime import date
import pandas as pd


make_python_type_usable_as_dagster_type(pd.DataFrame, PythonObjectDagsterType(pd.DataFrame))


@input_hydration_config(Selector({"date": {"year": Field(Int),
                                           "month": Field(Int),
                                           "day": Field(Int)}}))
def parse_date(context, selector):
    date_selector = selector["date"]
    return date(date_selector["year"], date_selector["month"], date_selector["day"])


make_python_type_usable_as_dagster_type(date, PythonObjectDagsterType(date, input_hydration_config=parse_date))