コード例 #1
0
ファイル: job.py プロジェクト: sd2k/dagster
 def config_type_pipeline_run(cls):
     """Configuration intended to be set at pipeline execution time.
     """
     return {
         "job_image":
         Field(
             Noneable(StringSource),
             is_required=False,
             description=
             "Docker image to use for launched task Jobs. If the repository is not "
             "loaded from a GRPC server, then this field is required. If the repository is "
             "loaded from a GRPC server, then leave this field empty."
             '(Ex: "mycompany.com/dagster-k8s-image:latest").',
         ),
         "image_pull_policy":
         Field(
             StringSource,
             is_required=False,
             default_value="IfNotPresent",
             description=
             "Image pull policy to set on the launched task Job Pods. Defaults to "
             '"IfNotPresent".',
         ),
         "image_pull_secrets":
         Field(
             Noneable(Array(Shape({"name": StringSource}))),
             is_required=False,
             description=
             "(Advanced) Specifies that Kubernetes should get the credentials from "
             "the Secrets named in this list.",
         ),
         "service_account_name":
         Field(
             Noneable(StringSource),
             is_required=False,
             description=
             "(Advanced) Override the name of the Kubernetes service account under "
             "which to run the Job.",
         ),
         "env_config_maps":
         Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description=
             "A list of custom ConfigMapEnvSource names from which to draw "
             "environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:"
             "https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container",
         ),
         "env_secrets":
         Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description=
             "A list of custom Secret names from which to draw environment "
             "variables (using ``envFrom``) for the Job. Default: ``[]``. See:"
             "https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables",
         ),
     }
コード例 #2
0
 def config_type_pipeline_run(cls):
     '''Configuration intended to be set at pipeline execution time.
     '''
     return {
         'job_image':
         Field(
             StringSource,
             is_required=True,
             description='Docker image to use for launched task Jobs '
             '(e.g. "mycompany.com/dagster-k8s-image:latest").',
         ),
         'image_pull_policy':
         Field(
             StringSource,
             is_required=False,
             default_value='IfNotPresent',
             description=
             'Image pull policy to set on the launched task Job Pods. Defaults to '
             '"IfNotPresent".',
         ),
         'image_pull_secrets':
         Field(
             Noneable(Array(Shape({'name': StringSource}))),
             is_required=False,
             description=
             '(Advanced) Specifies that Kubernetes should get the credentials from '
             'the Secrets named in this list.',
         ),
         'service_account_name':
         Field(
             Noneable(StringSource),
             is_required=False,
             description=
             '(Advanced) Override the name of the Kubernetes service account under '
             'which to run the Job.',
         ),
         'env_config_maps':
         Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description=
             'A list of custom ConfigMapEnvSource names from which to draw '
             'environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:'
             'https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container',
         ),
         'env_secrets':
         Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description=
             'A list of custom Secret names from which to draw environment '
             'variables (using ``envFrom``) for the Job. Default: ``[]``. See:'
             'https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables',
         ),
     }
コード例 #3
0
def test_noneable_string_source_array():
    assert process_config(Noneable(Array(StringSource)), []).success
    assert process_config(Noneable(Array(StringSource)), None).success
    assert (
        'You have attempted to fetch the environment variable "DAGSTER_TEST_ENV_VAR" '
        "which is not set. In order for this execution to succeed it must be set in "
        "this environment.") in process_config(Noneable(
            Array(StringSource)), ["test", {
                "env": "DAGSTER_TEST_ENV_VAR"
            }]).errors[0].message

    with environ({"DAGSTER_TEST_ENV_VAR": "baz"}):
        assert process_config(Noneable(Array(StringSource)),
                              ["test", {
                                  "env": "DAGSTER_TEST_ENV_VAR"
                              }]).success
コード例 #4
0
def test_basic_list():
    list_meta = meta_from_dagster_type(Array(int))
    assert list_meta.key.startswith('Array')
    child_type_keys = list_meta.get_child_type_keys()
    assert child_type_keys
    assert len(child_type_keys) == 1
    assert child_type_keys[0] == 'Int'
コード例 #5
0
def test_basic_list():
    list_snap = snap_from_dagster_type(Array(int))
    assert list_snap.key.startswith("Array")
    child_type_keys = list_snap.get_child_type_keys()
    assert child_type_keys
    assert len(child_type_keys) == 1
    assert child_type_keys[0] == "Int"
コード例 #6
0
def test_kitchen_sink_break_out():
    nested_dict_cls = resolve_to_config_type({
        'list_list': [[int]],
        'nested_selector':
        Selector({
            'some_field': int,
            'list': Noneable([bool])
        }),
    })
    dict_within_list_cls = resolve_to_config_type({
        'opt_list_of_int':
        Field([int], is_optional=True),
        'nested_dict':
        Field(nested_dict_cls)
    })
    kitchen_sink = Array(dict_within_list_cls)

    dict_within_list_key = dict_within_list_cls.key
    kitchen_sink_meta = meta_from_dagster_type(kitchen_sink)

    assert len(kitchen_sink_meta.type_param_refs) == 1
    assert kitchen_sink_meta.type_param_refs[0].key == dict_within_list_key
    assert len(kitchen_sink_meta.inner_type_refs) == 1
    assert kitchen_sink_meta.inner_type_refs[0].key == dict_within_list_key
    dict_within_list_meta = meta_from_dagster_type(dict_within_list_cls)
    assert dict_within_list_meta.type_param_refs is None
    # List[int], Int, Shape.XXX
    assert len(dict_within_list_meta.inner_type_refs) == 3
    assert sorted([
        type_ref.key for type_ref in dict_within_list_meta.inner_type_refs
    ]) == sorted([nested_dict_cls.key, 'Int', 'Array.Int'])
コード例 #7
0
def test_basic_list():
    list_meta = meta_from_dagster_type(Array(int))
    assert list_meta.key.startswith('Array')
    assert list_meta.inner_type_refs
    assert len(list_meta.inner_type_refs) == 1
    assert list_meta.inner_type_refs[0].key == 'Int'
    assert list_meta.enum_values is None
コード例 #8
0
def test_noneable_string_source_array():
    assert process_config(Noneable(Array(StringSource)), []).success
    assert process_config(Noneable(Array(StringSource)), None).success
    assert ((
        'You have attempted to fetch the environment variable "DAGSTER_TEST_ENV_VAR" '
        'which is not set. In order for this execution to succeed it must be set in '
        'this environment.')
            in process_config(Noneable(Array(StringSource)),
                              ['test', {
                                  'env': 'DAGSTER_TEST_ENV_VAR'
                              }]).errors[0].message)

    with environ({'DAGSTER_TEST_ENV_VAR': 'baz'}):
        assert process_config(Noneable(Array(StringSource)),
                              ['test', {
                                  'env': 'DAGSTER_TEST_ENV_VAR'
                              }]).success
コード例 #9
0
def test_list_nullable_int():
    lni = resolve_to_config_type(Array(Noneable(int)))

    assert validate_config(lni, [1]).success
    assert validate_config(lni, [1, 2]).success
    assert validate_config(lni, []).success
    assert validate_config(lni, [None]).success
    assert validate_config(lni, [1, None]).success
    assert not validate_config(lni, None).success
    assert not validate_config(lni, [1, 'absdf']).success
コード例 #10
0
def _base_config():
    return {
        'error_rate': Field(float, is_required=False, default_value=0.0),
        'sleep': Field(float, is_required=False, default_value=0.5),
        'materialization_key_list': Field(Array(str), is_required=False),
        'materialization_key': Field(str, is_required=False),
        'materialization_text': Field(str, is_required=False),
        'materialization_url': Field(str, is_required=False),
        'materialization_path': Field(str, is_required=False),
        'materialization_json': Field(Permissive(), is_required=False),
        'materialization_value': Field(float, is_required=False),
    }
コード例 #11
0
ファイル: longitudinal.py プロジェクト: sd2k/dagster
def _base_config():
    return {
        "error_rate": Field(float, is_required=False, default_value=0.0),
        "sleep": Field(float, is_required=False, default_value=0.5),
        "materialization_key_list": Field(Array(str), is_required=False),
        "materialization_key": Field(str, is_required=False),
        "materialization_text": Field(str, is_required=False),
        "materialization_url": Field(str, is_required=False),
        "materialization_path": Field(str, is_required=False),
        "materialization_json": Field(Permissive(), is_required=False),
        "materialization_value": Field(float, is_required=False),
    }
コード例 #12
0
ファイル: launcher.py プロジェクト: trevenrawr/dagster
 def config_type(cls):
     return {
         "task_definition": Field(
             StringSource,
             is_required=False,
             description=(
                 "The task definition to use when launching new tasks. "
                 "If none is provided, each run will create its own task "
                 "definition."
             ),
         ),
         "container_name": Field(
             StringSource,
             is_required=False,
             default_value="run",
             description=(
                 "The container name to use when launching new tasks. Defaults to 'run'."
             ),
         ),
         "secrets": Field(
             Array(
                 ScalarUnion(
                     scalar_type=str,
                     non_scalar_schema={"name": StringSource, "valueFrom": StringSource},
                 )
             ),
             is_required=False,
             description=(
                 "An array of AWS Secrets Manager secrets. These secrets will "
                 "be mounted as environment variabls in the container. See "
                 "https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_Secret.html."
             ),
         ),
         "secrets_tag": Field(
             Noneable(StringSource),
             is_required=False,
             default_value="dagster",
             description=(
                 "AWS Secrets Manager secrets with this tag will be mounted as "
                 "environment variables in the container. Defaults to 'dagster'."
             ),
         ),
         "include_sidecars": Field(
             bool,
             is_required=False,
             default_value=False,
             description=(
                 "Whether each run should use the same sidecars as the task that launches it. "
                 "Defaults to False."
             ),
         ),
     }
コード例 #13
0
ファイル: config.py プロジェクト: keyz/dagster
def python_logs_config_schema():
    return Field(
        {
            "managed_python_loggers":
            Field(Array(str), is_required=False),
            "python_log_level":
            Field(str, is_required=False),
            "dagster_handler_config":
            Field(
                {
                    "handlers": Field(dict, is_required=False),
                    "formatters": Field(dict, is_required=False),
                },
                is_required=False,
            ),
        },
        is_required=False,
    )
コード例 #14
0
 def config_type(cls):
     return {
         "task_definition":
         Field(
             StringSource,
             is_required=False,
             description=(
                 "The task definition to use when launching new tasks. "
                 "If none is provided, each run will create its own task "
                 "definition."),
         ),
         "container_name":
         Field(
             StringSource,
             is_required=False,
             default_value="run",
             description=
             ("The container name to use when launching new tasks. Defaults to 'run'."
              ),
         ),
         "secrets":
         Field(
             Array(StringSource),
             is_required=False,
             description=
             ("An array of AWS Secrets Manager secrets arns. These secrets will "
              "be mounted as environment variabls in the container."),
         ),
         "secrets_tag":
         Field(
             StringSource,
             is_required=False,
             default_value="dagster",
             description=
             ("AWS Secrets Manager secrets with this tag will be mounted as "
              "environment variables in the container. Defaults to 'dagster'."
              ),
         ),
     }
コード例 #15
0
ファイル: solids.py プロジェクト: zuik/dagster
    with open(target_path, "w") as fd:
        fd.write(requests.get(url).text)

    return target_path


@solid(required_resource_keys={"db"})
def load_cereals_from_csv(context, csv_file_path):
    cereals_df = pandas.read_csv(csv_file_path)
    with context.resources.db.connect() as conn:
        conn.execute("drop table if exists cereals cascade")
        cereals_df.to_sql(name="cereals", con=conn)


@solid(config_schema={"channels": Array(str)}, required_resource_keys={"slack"})
def post_plot_to_slack(context, plot_path):
    context.resources.slack.files_upload(
        channels=",".join(context.solid_config["channels"]), file=plot_path
    )


# start_solid_marker_0
run_cereals_models = dbt_cli_run.configured(
    config_or_config_fn={"project-dir": PROJECT_DIR, "profiles-dir": PROFILES_DIR},
    name="run_cereals_models",
)
# end_solid_marker_0

test_cereals_models = dbt_cli_test.configured(
    config_or_config_fn={"project-dir": PROJECT_DIR, "profiles-dir": PROFILES_DIR},
コード例 #16
0
    return output.value


@solid(
    description="A solid to invoke dbt run over RPC.",
    input_defs=[InputDefinition(name="start_after", dagster_type=Nothing)],
    output_defs=[
        OutputDefinition(
            name="request_token",
            dagster_type=String,
            description="The request token of the invoked dbt run.",
        )
    ],
    config_schema={
        "models": Field(
            config=Noneable(Array(String)),
            default_value=None,
            is_required=False,
            description="The dbt models to run.",
        ),
        "exclude": Field(
            config=Noneable(Array(String)),
            default_value=None,
            is_required=False,
            description="The dbt models to exclude.",
        ),
    },
    required_resource_keys={"dbt_rpc"},
    tags={"kind": "dbt"},
)
def dbt_rpc_run(context: SolidExecutionContext) -> String:
コード例 #17
0
ファイル: repo.py プロジェクト: zuik/dagster
)
def rollup_pipeline():
    rollup_data()


@resource(
    config_schema={
        "string": str,
        "string_source": StringSource,
        "int_source": IntSource,
        "number": int,
        "boolean": bool,
        "not_required": Field(bool, is_required=False),
        "default_value": Field(str, default_value="default_value"),
        "enum": Enum("CowboyType", [EnumValue("good"), EnumValue("bad"), EnumValue("ugly")]),
        "array": Array(String),
        "selector": Selector(
            {
                "a": str,
                "b": str,
                "c": str,
            }
        ),
        "noneable_array": Noneable(Array(String)),
        "noneable_string": Noneable(String),
    }
)
def my_resource(_):
    return None

コード例 #18
0
from typing import Dict
from dagster import solid, SolidExecutionContext, Field, Array, String
from azmeta.access.advisor import load_resize_recommendations


@solid(config_schema={
    'subscriptions': Field(Array(String), description='The subscriptions to query in Azure Advisor.')
})
def get_recommendations(context: SolidExecutionContext) -> Dict[str,str]:
    config = context.solid_config
    recommendations = load_resize_recommendations(config['subscriptions'])
    return {k:v.extended_properties['targetSku'] for k, v in recommendations.items()}
コード例 #19
0
ファイル: base.py プロジェクト: zuik/dagster
import json
import os

import boto3
import requests
from dagster import Array, pipeline, repository, solid
from slack import WebClient  # pylint:disable=import-error


@solid(config_schema={"portfolio": Array(str)})
def query_stock_market_data(context) -> dict:
    portfolio = context.solid_config["portfolio"]

    responses = {}
    for ticker_symbol in portfolio:
        request_url = "{API_URL}/quote/{ticker_symbol}?apikey={API_KEY}".format(
            API_URL="https://financialmodelingprep.com/api/v3",
            API_KEY=os.getenv("DEMO_STOCK_API_KEY"),
            ticker_symbol=ticker_symbol,
        )

        response = requests.get(request_url)
        response.raise_for_status()
        responses[ticker_symbol] = response.json()

    return responses


@solid
def compute_summary_message(_, json_responses: dict) -> str:
    header = "Daily Portfolio Update:\n"
コード例 #20
0
ファイル: job.py プロジェクト: bbbbbgit/dagster
 def config_type(cls):
     return {
         'job_image': Field(
             str,
             is_required=True,
             description='Docker image to use for launched task Jobs '
             '(e.g. "mycompany.com/dagster-k8s-image:latest").',
         ),
         'dagster_home': Field(
             str,
             is_required=False,
             default_value=DAGSTER_HOME_DEFAULT,
             description='The location of DAGSTER_HOME in the Job container; this is where the '
             '``dagster.yaml`` file will be mounted from the instance ConfigMap specified here. '
             'Defaults to /opt/dagster/dagster_home.',
         ),
         'image_pull_policy': Field(
             str,
             is_required=False,
             default_value='IfNotPresent',
             description='Image pull policy to set on the launched task Job Pods. Defaults to '
             '"IfNotPresent".',
         ),
         'image_pull_secrets': Field(
             Array(str),
             is_required=False,
             description='(Advanced) Specifies that Kubernetes should get the credentials from '
             'the Secrets named in this list.',
         ),
         'service_account_name': Field(
             Noneable(str),
             is_required=False,
             description='(Advanced) Override the name of the Kubernetes service account under '
             'which to run the Job.',
         ),
         'instance_config_map': Field(
             str,
             is_required=True,
             description='The ``name`` of an existing Volume to mount into the pod in order to '
             'provide a ConfigMap for the Dagster instance. This Volume should contain a '
             '``dagster.yaml`` with appropriate values for run storage, event log storage, etc.',
         ),
         'postgres_password_secret': Field(
             str,
             is_required=True,
             description='The name of the Kubernetes Secret where the postgres password can be '
             'retrieved. Will be mounted and supplied as an environment variable to the Job Pod.'
             'Secret must contain the key ``"postgresql-password"`` which will be exposed in '
             'the Job environment as the environment variable ``DAGSTER_PG_PASSWORD``.',
         ),
         'env_config_maps': Field(
             Noneable(list),
             is_required=False,
             description='A list of custom ConfigMapEnvSource names from which to draw '
             'environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:'
             'https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container',
         ),
         'env_secrets': Field(
             Noneable(list),
             is_required=False,
             description='A list of custom Secret names from which to draw environment '
             'variables (using ``envFrom``) for the Job. Default: ``[]``. See:'
             'https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables',
         ),
     }
コード例 #21
0
ファイル: solids.py プロジェクト: yingjiebyron/dagster
    with open(target_path, "w") as fd:
        fd.write(requests.get(url).text)

    return target_path


@solid(required_resource_keys={"db"})
def load_cereals_from_csv(context, csv_file_path):
    cereals_df = pandas.read_csv(csv_file_path)
    with context.resources.db.connect() as conn:
        conn.execute("drop table if exists cereals cascade")
        cereals_df.to_sql(name="cereals", con=conn)


@solid(config_schema={"channels": Array(str)},
       required_resource_keys={"slack"})
def post_plot_to_slack(context, plot_path):
    context.resources.slack.files_upload(channels=",".join(
        context.solid_config["channels"]),
                                         file=plot_path)


run_cereals_models = dbt_cli_run.configured(
    name="run_cereals_models",
    config_or_config_fn={
        "project-dir": PROJECT_DIR,
        "profiles-dir": PROFILES_DIR
    },
)
コード例 #22
0
ファイル: ops.py プロジェクト: keyz/dagster
            default_value=None,
            description=
            "The maximum time that will waited before this operation is timed out. By "
            "default, this will never time out.",
        ),
        "yield_materializations":
        Field(
            config=Bool,
            default_value=True,
            description=
            ("If True, materializations corresponding to the results of the Fivetran sync will "
             "be yielded when the op executes."),
        ),
        "asset_key_prefix":
        Field(
            config=Array(str),
            default_value=["fivetran"],
            description=
            ("If provided and yield_materializations is True, these components will be used to "
             "prefix the generated asset keys."),
        ),
    },
    tags={"kind": "fivetran"},
)
def fivetran_sync_op(context):
    """
    Executes a Fivetran sync for a given ``connector_id``, and polls until that sync
    completes, raising an error if it is unsuccessful. It outputs a FivetranOutput which contains
    the details of the Fivetran connector after the sync successfully completes, as well as details
    about which tables the sync updates.
コード例 #23
0
        yield EventMetadataEntry(label="num rows written to db",
                                 description=None,
                                 entry_data=EventMetadata.int(len(obj)))

    def get_output_asset_key(self, context):
        return AssetKey([
            "my_database",
            context.metadata["table_name"],
        ])

    def get_output_asset_partitions(self, context):
        return set(context.config.get("partitions", []))


@io_manager(
    output_config_schema={"partitions": Field(Array(str), is_required=False)})
def my_db_io_manager(_):
    return MyDatabaseIOManager()


@solid(
    output_defs=[
        OutputDefinition(io_manager_key="my_db_io_manager",
                         metadata={"table_name": "raw_actions"}),
    ], )
def download_data(_):
    n_entries = random.randint(100, 1000)

    def user_id():
        return "".join(random.choices(string.ascii_uppercase, k=10))
コード例 #24
0
if TYPE_CHECKING:
    ResourcesDataFrame = Any  # DataFrame # Pandas has no type info yet.
else:
    ResourcesDataFrame = create_dagster_pandas_dataframe_type(
        name='ResourcesDataFrame',
        columns=[
            PandasColumn.string_column('resource_id'),
            PandasColumn.string_column('subscription_id'),
        ],
    )


@solid(
    config_schema={
        'subscriptions':
        Field(Array(String),
              description='The subscriptions to query in the Resource Graph.'),
        'filters':
        Field(String,
              is_required=False,
              description='Conditions for a KQL where operator.'),
        'custom_projections':
        Field(String,
              is_required=False,
              description='Assignments for a KQL project operator.'),
    })
def query_vm_resources(context: SolidExecutionContext) -> ResourcesDataFrame:
    config = context.solid_config
    filters = f'| where {config["filters"]}' if 'filters' in config else ''
    custom_projections = f', {config["custom_projections"]}' if 'custom_projections' in config else ''
コード例 #25
0
ファイル: launcher.py プロジェクト: crazy32571/dagster
 def config_type(cls):
     return {
         'key_id':
         Field(
             Noneable(StringSource),
             is_required=False,
             default_value=None,
             description=
             "the AWS access key ID to use, overriding environment vars",
         ),
         'access_key':
         Field(
             Noneable(StringSource),
             is_required=False,
             default_value=None,
             description=
             "the AWS access key to use, overriding environment vars",
         ),
         'command':
         Field(
             Array(StringSource),
             is_required=True,
             description="what commands to run on the container",
         ),
         'entrypoint':
         Field(
             Array(StringSource),
             is_required=False,
             default_value=["/bin/bash", "-c"],
             description="what entrypoint the commands run from",
         ),
         'family':
         Field(
             StringSource,
             is_required=False,
             default_value='dagstertask',
             description="what family of tasks you want to be revising",
         ),
         'containername':
         Field(
             StringSource,
             is_required=False,
             default_value='dagstercontainer',
             description=
             "what you want the docker container running the tasks to be called",
         ),
         'imagename':
         Field(
             StringSource,
             is_required=False,
             default_value='httpd:2.4',
             description="the URI for the docker image of the container",
         ),
         'memory':
         Field(
             StringSource,
             is_required=False,
             default_value='512',
             description="the memory in MB that the task will need",
         ),
         'cpu':
         Field(
             StringSource,
             is_required=False,
             default_value='256',
             description="the CPU in VCPU that the task will need",
         ),
         'region_name':
         Field(
             StringSource,
             is_required=False,
             default_value='us-east-2',
             description="which region the AWS cluster is on",
         ),
         'launch_type':
         Field(
             StringSource,
             is_required=False,
             default_value='FARGATE',
             description=
             "whether to use EC2 or FARGATE for running the task -- currently only Fargate is supported",
         ),
         'grab_logs':
         Field(
             Bool,
             is_required=False,
             default_value='FARGATE',
             description="whether to pull down ECS logs for completed tasks",
         ),
     }
コード例 #26
0
ファイル: configs.py プロジェクト: richardwhitefoot/dagster
def define_bigquery_load_config():
    sf = _define_shared_fields()

    allow_jagged_rows = Field(
        Bool, description="Allow missing trailing optional columns (CSV only).", is_required=False
    )

    allow_quoted_newlines = Field(
        Bool,
        description="Allow quoted data containing newline characters (CSV only).",
        is_required=False,
    )

    autodetect = Field(
        Bool,
        description="Automatically infer the schema from a sample of the data.",
        is_required=False,
    )

    # Destination is a required field for BQ loads
    destination = Field(
        Table,
        description="""table where results are written or None if not set. The destination setter
        accepts a str of the fully-qualified table ID in standard SQL format. The value must
        included a project ID, dataset ID, and table ID, each separated by ".". For example:
        your-project.your_dataset.your_table.
        See https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable
        """,
        is_required=True,
    )

    destination_table_description = Field(
        StringSource, description="description given to destination table.", is_required=False
    )

    destination_table_friendly_name = Field(
        StringSource, description="name given to destination table.", is_required=False
    )

    encoding = Field(
        BQEncoding, description="The character encoding of the data.", is_required=False
    )

    field_delimiter = Field(
        StringSource, description="The separator for fields in a CSV file.", is_required=False
    )

    ignore_unknown_values = Field(
        Bool,
        description="Ignore extra values not represented in the table schema.",
        is_required=False,
    )

    max_bad_records = Field(
        IntSource, description="Number of invalid rows to ignore.", is_required=False
    )

    null_marker = Field(
        StringSource, description="Represents a null value (CSV only).", is_required=False
    )

    quote_character = Field(
        StringSource,
        description="Character used to quote data sections (CSV only).",
        is_required=False,
    )

    schema = Field(
        Array(inner_type=dict), description="Schema of the destination table.", is_required=False
    )

    skip_leading_rows = Field(
        IntSource,
        description="Number of rows to skip when reading data (CSV only).",
        is_required=False,
    )

    source_format = Field(BQSourceFormat, description="File format of the data.", is_required=False)

    use_avro_logical_types = Field(
        Bool,
        description="""For loads of Avro data, governs whether Avro logical types are converted to
        their corresponding BigQuery types(e.g. TIMESTAMP) rather than raw types (e.g. INTEGER).""",
        is_required=False,
    )

    return {
        "destination": destination,
        "load_job_config": {
            "allow_jagged_rows": allow_jagged_rows,
            "allow_quoted_newlines": allow_quoted_newlines,
            "autodetect": autodetect,
            "clustering_fields": sf["clustering_fields"],
            "create_disposition": sf["create_disposition"],
            "destination_encryption_configuration": sf["destination_encryption_configuration"],
            "destination_table_description": destination_table_description,
            "destination_table_friendly_name": destination_table_friendly_name,
            "encoding": encoding,
            "field_delimiter": field_delimiter,
            "ignore_unknown_values": ignore_unknown_values,
            "max_bad_records": max_bad_records,
            "null_marker": null_marker,
            "quote_character": quote_character,
            "schema": schema,
            "schema_update_options": sf["schema_update_options"],
            "skip_leading_rows": skip_leading_rows,
            "source_format": source_format,
            "time_partitioning": sf["time_partitioning"],
            "use_avro_logical_types": use_avro_logical_types,
            "write_disposition": sf["write_disposition"],
        },
    }
コード例 #27
0
ファイル: resources.py プロジェクト: trevenrawr/dagster
    """
    return construct_secretsmanager_client(
        max_attempts=context.resource_config["max_attempts"],
        region_name=context.resource_config.get("region_name"),
        profile_name=context.resource_config.get("profile_name"),
    )


@resource(
    merge_dicts(
        SECRETSMANAGER_SESSION_CONFIG,
        {
            "secrets":
            Field(
                Array(str),
                is_required=False,
                default_value=[],
                description=(
                    "An array of AWS Secrets Manager secrets arns to fetch."),
            ),
            "secrets_tag":
            Field(
                Noneable(str),
                is_required=False,
                default_value=None,
                description=
                ("AWS Secrets Manager secrets with this tag will be fetched and made available."
                 ),
            ),
            "add_to_environment":
コード例 #28
0
ファイル: job.py プロジェクト: amarrella/dagster
 def config_type_run_launcher(cls):
     """Configuration intended to be set on the Dagster instance."""
     return {
         "instance_config_map": Field(
             StringSource,
             is_required=True,
             description="The ``name`` of an existing Volume to mount into the pod in order to "
             "provide a ConfigMap for the Dagster instance. This Volume should contain a "
             "``dagster.yaml`` with appropriate values for run storage, event log storage, etc.",
         ),
         "postgres_password_secret": Field(
             StringSource,
             is_required=False,
             description="The name of the Kubernetes Secret where the postgres password can be "
             "retrieved. Will be mounted and supplied as an environment variable to the Job Pod."
             'Secret must contain the key ``"postgresql-password"`` which will be exposed in '
             "the Job environment as the environment variable ``DAGSTER_PG_PASSWORD``.",
         ),
         "dagster_home": Field(
             StringSource,
             is_required=False,
             default_value=DAGSTER_HOME_DEFAULT,
             description="The location of DAGSTER_HOME in the Job container; this is where the "
             "``dagster.yaml`` file will be mounted from the instance ConfigMap specified here. "
             "Defaults to /opt/dagster/dagster_home.",
         ),
         "env_config_maps": Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description="A list of custom ConfigMapEnvSource names from which to draw "
             "environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:"
             "https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container",
         ),
         "env_secrets": Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description="A list of custom Secret names from which to draw environment "
             "variables (using ``envFrom``) for the Job. Default: ``[]``. See:"
             "https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables",
         ),
         "volume_mounts": Field(
             Array(
                 Shape(
                     {
                         "name": StringSource,
                         "mountPath": StringSource,
                         "mountPropagation": Field(StringSource, is_required=False),
                         "readOnly": Field(BoolSource, is_required=False),
                         "subPath": Field(StringSource, is_required=False),
                         "subPathExpr": Field(StringSource, is_required=False),
                     }
                 )
             ),
             is_required=False,
             default_value=[],
             description="A list of volume mounts to include in the job's container. Default: ``[]``. See: "
             "https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#volumemount-v1-core",
         ),
         "volumes": Field(
             Array(
                 Permissive(
                     {
                         "name": str,
                     }
                 )
             ),
             is_required=False,
             default_value=[],
             description="A list of volumes to include in the Job's Pod. Default: ``[]``. For the many "
             "possible volume source types that can be included, see: "
             "https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#volume-v1-core",
         ),
     }
コード例 #29
0
    Field(
        [str],
        is_required=False,
        description=
        "The list of environment variables names to forward to the docker container",
    ),
    "network":
    Field(
        StringSource,
        is_required=False,
        description=
        "Name of the network to which to connect the launched container at creation time",
    ),
    "networks":
    Field(
        Array(StringSource),
        is_required=False,
        description=
        "Names of the networks to which to connect the launched container at creation time",
    ),
    "container_kwargs":
    Field(
        Permissive(),
        is_required=False,
        description=
        "key-value pairs that can be passed into containers.create. See "
        "https://docker-py.readthedocs.io/en/stable/containers.html for the full list "
        "of available options.",
    ),
}
コード例 #30
0
ファイル: job.py プロジェクト: amarrella/dagster
 def config_type_pipeline_run(cls, default_image_pull_policy=None):
     """Configuration intended to be set at pipeline execution time."""
     return {
         "job_image": Field(
             Noneable(StringSource),
             is_required=False,
             description="Docker image to use for launched task Jobs. If the repository is not "
             "loaded from a GRPC server, then this field is required. If the repository is "
             "loaded from a GRPC server, then leave this field empty."
             '(Ex: "mycompany.com/dagster-k8s-image:latest").',
         ),
         "image_pull_policy": Field(
             Noneable(StringSource),
             is_required=False,
             description="Image pull policy to set on the launched task Job Pods. Defaults to "
             '"IfNotPresent".',
             default_value=default_image_pull_policy,
         ),
         "image_pull_secrets": Field(
             Noneable(Array(Shape({"name": StringSource}))),
             is_required=False,
             description="(Advanced) Specifies that Kubernetes should get the credentials from "
             "the Secrets named in this list.",
         ),
         "service_account_name": Field(
             Noneable(StringSource),
             is_required=False,
             description="(Advanced) Override the name of the Kubernetes service account under "
             "which to run the Job.",
         ),
         "env_config_maps": Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description="A list of custom ConfigMapEnvSource names from which to draw "
             "environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:"
             "https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container",
         ),
         "env_secrets": Field(
             Noneable(Array(StringSource)),
             is_required=False,
             description="A list of custom Secret names from which to draw environment "
             "variables (using ``envFrom``) for the Job. Default: ``[]``. See:"
             "https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables",
         ),
         "env_vars": Field(
             Noneable(Array(str)),
             is_required=False,
             description="A list of environment variables to inject into the Job. "
             "Default: ``[]``. See: "
             "https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables",
         ),
         "volume_mounts": Field(
             Array(
                 Shape(
                     {
                         "name": StringSource,
                         "mountPath": StringSource,
                         "mountPropagation": Field(StringSource, is_required=False),
                         "readOnly": Field(BoolSource, is_required=False),
                         "subPath": Field(StringSource, is_required=False),
                         "subPathExpr": Field(StringSource, is_required=False),
                     }
                 )
             ),
             is_required=False,
             default_value=[],
             description="A list of volume mounts to include in the job's container. Default: ``[]``. See: "
             "https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#volumemount-v1-core",
         ),
         "volumes": Field(
             Array(
                 Permissive(
                     {
                         "name": str,
                     }
                 )
             ),
             is_required=False,
             default_value=[],
             description="A list of volumes to include in the Job's Pod. Default: ``[]``. For the many "
             "possible volume source types that can be included, see: "
             "https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#volume-v1-core",
         ),
     }