def test_register_after_solid_definition(): class MyClass: pass @solid def _my_solid(_) -> MyClass: return MyClass() my_dagster_type = DagsterType(name="aaaa", type_check_fn=lambda _, _a: True) with pytest.raises(DagsterInvalidDefinitionError): make_python_type_usable_as_dagster_type(MyClass, my_dagster_type)
def test_make_usable_as_dagster_type(): class EvenType: def __init__(self, num): assert num % 2 is 0 self.num = num EvenDagsterType = PythonObjectDagsterType(EvenType, name="EvenDagsterType",) make_python_type_usable_as_dagster_type(EvenType, EvenDagsterType) @solid def double_even(_, even_num: EvenType) -> EvenType: return EvenType(even_num.num * 2) assert execute_solid(double_even, input_values={"even_num": EvenType(2)}).success
def test_make_usable_as_dagster_type_called_twice(): class AType: pass ADagsterType = PythonObjectDagsterType(AType, name="ADagsterType",) BDagsterType = PythonObjectDagsterType(AType, name="BDagsterType",) make_python_type_usable_as_dagster_type(AType, ADagsterType) make_python_type_usable_as_dagster_type(AType, ADagsterType) # should not raise an error with pytest.raises(DagsterInvalidDefinitionError): make_python_type_usable_as_dagster_type(AType, BDagsterType)
# start-snippet from pathlib import Path from dagster import graph, make_python_type_usable_as_dagster_type, op, repository from dagster.core.definitions.no_step_launcher import no_step_launcher from dagster_aws.emr import emr_pyspark_step_launcher from dagster_aws.s3 import s3_pickle_io_manager, s3_resource from dagster_pyspark import DataFrame as DagsterPySparkDataFrame from dagster_pyspark import pyspark_resource from pyspark.sql import DataFrame, Row from pyspark.sql.types import IntegerType, StringType, StructField, StructType # Make pyspark.sql.DataFrame map to dagster_pyspark.DataFrame make_python_type_usable_as_dagster_type(python_type=DataFrame, dagster_type=DagsterPySparkDataFrame) @op(required_resource_keys={"pyspark", "pyspark_step_launcher"}) def make_people(context) -> DataFrame: schema = StructType([StructField("name", StringType()), StructField("age", IntegerType())]) rows = [Row(name="Thom", age=51), Row(name="Jonny", age=48), Row(name="Nigel", age=49)] return context.resources.pyspark.spark_session.createDataFrame(rows, schema) @op(required_resource_keys={"pyspark_step_launcher"}) def filter_over_50(people: DataFrame) -> DataFrame: return people.filter(people["age"] > 50) @op(required_resource_keys={"pyspark_step_launcher"}) def count_people(people: DataFrame) -> int: return people.count()
check, composite_solid, make_python_type_usable_as_dagster_type, solid, ) from dagster.core.types.dagster_type import create_string_type from .cache_file_from_s3 import cache_file_from_s3 from .unzip_file_handle import unzip_file_handle SqlTableName = create_string_type("SqlTableName", description="The name of a database table") # Make pyspark.sql.DataFrame map to dagster_pyspark.DataFrame make_python_type_usable_as_dagster_type( python_type=DataFrame, dagster_type=dagster_pyspark.DataFrame ) PARQUET_SPECIAL_CHARACTERS = r"[ ,;{}()\n\t=]" def _notebook_path(name): return os.path.join(os.path.dirname(os.path.abspath(__file__)), "notebooks", name) # start_solids_marker_3 def notebook_solid(name, notebook_path, input_defs, output_defs, required_resource_keys): return define_dagstermill_solid( name, _notebook_path(notebook_path),
from dagster import make_python_type_usable_as_dagster_type, PythonObjectDagsterType, input_hydration_config, Selector, \ Int, Field from datetime import date import pandas as pd make_python_type_usable_as_dagster_type(pd.DataFrame, PythonObjectDagsterType(pd.DataFrame)) @input_hydration_config(Selector({"date": {"year": Field(Int), "month": Field(Int), "day": Field(Int)}})) def parse_date(context, selector): date_selector = selector["date"] return date(date_selector["year"], date_selector["month"], date_selector["day"]) make_python_type_usable_as_dagster_type(date, PythonObjectDagsterType(date, input_hydration_config=parse_date))
import argparse import csv from dataclasses import dataclass import logging import functools import sys from typing import Any, Callable, NoReturn, TextIO, TypeVar, cast from dagster import make_python_type_usable_as_dagster_type from dagster.core.types.dagster_type import String as DagsterString from dagster_utils.contrib.google import default_google_access_token from dagster_utils.contrib.data_repo.typing import JobId from data_repo_client import ApiClient, Configuration, RepositoryApi, ApiException, ResourcesApi make_python_type_usable_as_dagster_type(JobId, DagsterString) data_repo_host = { "dev": "https://jade.datarepo-dev.broadinstitute.org/", "prod": "https://jade-terra.datarepo-prod.broadinstitute.org/", "real_prod": "https://data.terra.bio/" } data_repo_profile_ids = { "dev": "390e7a85-d47f-4531-b612-165fc977d3bd", "prod": "db61c343-6dfe-4d14-84e9-60ddf97ea73f" } @dataclass class ProblemCount:
from dagster import ( execute_pipeline, make_python_type_usable_as_dagster_type, pipeline, repository, solid, ) from dagster_pandas import DataFrame as DagsterPandasDataFrame from pandas import DataFrame make_python_type_usable_as_dagster_type(python_type=DataFrame, dagster_type=DagsterPandasDataFrame) # start_intro_0 @solid(description="Calculates the grams of sugar per cup of each kind of cereal.") def sugar_by_volume(_, cereals: DataFrame) -> DataFrame: df = cereals[["name"]].copy() df["sugar_per_cup"] = cereals["sugars"] / cereals["cups"] return df @solid(description="Finds the sugar-per-cup cutoff for the top quartile of cereals.") def top_quartile_cutoff(_, cereals: DataFrame) -> float: return cereals["sugar_per_cup"].quantile(0.75) @solid(description="Selects cereals whose sugar-per-cup exceeds the given cutoff.") def sugariest_cereals(_, cereals: DataFrame, cutoff: float) -> DataFrame: return cereals[cereals["sugar_per_cup"] > cutoff] @pipeline
from dagster import solid, SolidExecutionContext, Field, Array, String, PythonObjectDagsterType, make_python_type_usable_as_dagster_type from typing import Any, Optional, List, TYPE_CHECKING from azmeta.access.specifications import AzureComputeSpecifications, load_compute_specifications AzureComputeSpecificationsDagsterType = PythonObjectDagsterType( AzureComputeSpecifications) make_python_type_usable_as_dagster_type(AzureComputeSpecifications, AzureComputeSpecificationsDagsterType) @solid( config_schema={ 'subscription': Field(String, is_required=False, description='The subscription ID to list SKUs from.') }) def load_compute_specs( context: SolidExecutionContext) -> AzureComputeSpecifications: return load_compute_specifications(logger=context.log)
from dagster import PythonObjectDagsterType, make_python_type_usable_as_dagster_type, op class EvenType: def __init__(self, num): assert num % 2 is 0 self.num = num EvenDagsterType = PythonObjectDagsterType(EvenType, name="EvenDagsterType") make_python_type_usable_as_dagster_type(EvenType, EvenDagsterType) @op def double_even(even_num: EvenType) -> EvenType: return EvenType(even_num.num * 2)
""" Complex type signatures that appear multiple times throughout the code base can live here, for easy reference and descriptive naming. """ from typing import NamedTuple from dagster import make_python_type_usable_as_dagster_type from dagster.core.types.dagster_type import String as DagsterString class HcaScratchDatasetName(str): pass make_python_type_usable_as_dagster_type(HcaScratchDatasetName, DagsterString) class MetadataType(str): pass class MetadataTypeFanoutResult(NamedTuple): scratch_dataset_name: HcaScratchDatasetName metadata_type: MetadataType path: str