Esempio n. 1
0
def scaffold(module_name, pipeline_name, output_path, config, preset):
    """Creates a DAG file for a specified dagster pipeline"""
    check.tuple_param(config, "config", of_type=str)
    check.invariant(isinstance(config, tuple))
    check.invariant(
        output_path is not None,
        "You must specify --output-path or set AIRFLOW_HOME to use this script.",
    )

    run_config = construct_environment_yaml(preset, config, pipeline_name,
                                            module_name)
    file_contents = construct_scaffolded_file_contents(module_name,
                                                       pipeline_name,
                                                       run_config)

    # Ensure output_path/dags exists
    dags_path = os.path.join(os.path.expanduser(output_path), "dags")
    if not os.path.isdir(dags_path):
        os.makedirs(dags_path)

    dag_file = os.path.join(os.path.expanduser(output_path), "dags",
                            pipeline_name + ".py")

    click.echo("Wrote DAG scaffold to file: %s" % dag_file)

    with open(dag_file, "wb") as f:
        f.write(file_contents)
Esempio n. 2
0
def scaffold(module_name, pipeline_name, output_path, config, preset, job_name):
    """Creates a DAG file for a specified dagster pipeline"""
    job_name = canonicalize_backcompat_args(
        new_val=job_name,
        new_arg="job_name",
        old_val=pipeline_name,
        old_arg="pipeline_name",
        breaking_version="future versions",
        coerce_old_to_new=lambda val: val,
    )

    check.invariant(job_name is not None, "You must specify either --job-name or --pipeline-name.")
    check.tuple_param(config, "config", of_type=str)
    check.invariant(isinstance(config, tuple))
    check.invariant(
        output_path is not None,
        "You must specify --output-path or set AIRFLOW_HOME to use this script.",
    )

    run_config = construct_environment_yaml(preset, config, job_name, module_name)
    file_contents = construct_scaffolded_file_contents(module_name, job_name, run_config)

    # Ensure output_path/dags exists
    dags_path = os.path.join(os.path.expanduser(output_path), "dags")
    if not os.path.isdir(dags_path):
        os.makedirs(dags_path)

    dag_file = os.path.join(os.path.expanduser(output_path), "dags", job_name + ".py")

    click.echo("Wrote DAG scaffold to file: %s" % dag_file)

    with open(dag_file, "wb") as f:
        f.write(file_contents)
Esempio n. 3
0
    def __new__(
        cls,
        asset_key: Union[List[str], AssetKey, str],
        description: Optional[str] = None,
        metadata_entries: Optional[List[MetadataEntry]] = None,
        partition: Optional[str] = None,
        metadata: Optional[Dict[str, RawMetadataValue]] = None,
    ):
        if isinstance(asset_key, AssetKey):
            check.inst_param(asset_key, "asset_key", AssetKey)
        elif isinstance(asset_key, str):
            asset_key = AssetKey(parse_asset_key_string(asset_key))
        elif isinstance(asset_key, list):
            check.list_param(asset_key, "asset_key", of_type=str)
            asset_key = AssetKey(asset_key)
        else:
            check.tuple_param(asset_key, "asset_key", of_type=str)
            asset_key = AssetKey(asset_key)

        metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
        metadata_entries = check.opt_list_param(metadata_entries,
                                                "metadata_entries",
                                                of_type=MetadataEntry)

        return super(AssetObservation, cls).__new__(
            cls,
            asset_key=asset_key,
            description=check.opt_str_param(description, "description"),
            metadata_entries=cast(
                List[MetadataEntry],
                normalize_metadata(metadata, metadata_entries)),
            partition=check.opt_str_param(partition, "partition"),
        )
Esempio n. 4
0
def canonicalize_path(
        path: Union[Tuple[str, ...], List[str], str]) -> Tuple[str, ...]:
    if isinstance(path, str):
        return (path, )
    elif isinstance(path, tuple):
        check.tuple_param(path, "path", of_type=str)
        return path
    elif isinstance(path, list):
        check.list_param(path, "path", of_type=str)
        return tuple(path)
    else:
        check.failed(
            "path param must be a string, list of strings, or tuple of strings"
        )
Esempio n. 5
0
 def __init__(self, storage_key, path, computation):
     self._storage_key = check.str_param(storage_key, 'storage_key')
     self._path = check.tuple_param(path, 'path', of_type=str)
     self._computation = check.opt_inst_param(computation, 'computation',
                                              Computation)
     self._dagster_type = DagsterType(type_check_fn=lambda a, b: True,
                                      name='.'.join(self.path))
Esempio n. 6
0
def serialize_dagster_namedtuple(nt, **json_kwargs):
    return _serialize_dagster_namedtuple(
        check.tuple_param(nt, 'nt'),
        enum_map=_WHITELISTED_ENUM_MAP,
        tuple_map=_WHITELISTED_TUPLE_MAP,
        **json_kwargs
    )
Esempio n. 7
0
    def __init__(
        self,
        python_type: t.Union[t.Type, t.Tuple[t.Type, ...]],
        key: t.Optional[str] = None,
        name: t.Optional[str] = None,
        **kwargs,
    ):
        if isinstance(python_type, tuple):
            self.python_type = check.tuple_param(
                python_type, "python_type", of_shape=tuple(type for item in python_type)
            )
            self.type_str = "Union[{}]".format(
                ", ".join(python_type.__name__ for python_type in python_type)
            )
            typing_type = t.Union[python_type]  # type: ignore

        else:
            self.python_type = check.type_param(python_type, "python_type")  # type: ignore
            self.type_str = cast(str, python_type.__name__)
            typing_type = self.python_type  # type: ignore
        name = check.opt_str_param(name, "name", self.type_str)
        key = check.opt_str_param(key, "key", name)
        super(PythonObjectDagsterType, self).__init__(
            key=key,
            name=name,
            type_check_fn=isinstance_type_check_fn(python_type, name, self.type_str),
            typing_type=typing_type,
            **kwargs,
        )
Esempio n. 8
0
def test_opt_tuple_param():
    assert check.opt_tuple_param((1, 2), 'something')
    assert check.opt_tuple_param(None, 'something') is None
    assert check.opt_tuple_param(None, 'something', (2)) == (2)

    with pytest.raises(CheckError):
        assert check.tuple_param(1, 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param([1], 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param({1: 2}, 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param('kdjfkd', 'something')
Esempio n. 9
0
    def __new__(cls, path=None):
        if isinstance(path, str):
            path = [path]
        elif isinstance(path, list):
            path = check.list_param(path, "path", of_type=str)
        else:
            path = check.tuple_param(path, "path", of_type=str)

        return super(AssetKey, cls).__new__(cls, path=path)
Esempio n. 10
0
    def __new__(cls, path=None):
        if check.is_str(path):
            path = [validate_asset_key_string(path)]
        elif isinstance(path, list):
            path = validate_structured_asset_key(check.list_param(path, "path", of_type=str))
        else:
            path = validate_structured_asset_key(check.tuple_param(path, "path", of_type=str))

        return super(AssetKey, cls).__new__(cls, path=path)
Esempio n. 11
0
    def __new__(cls,
                path: Optional[Union[str, List[str], Tuple[str, ...]]] = None):
        if isinstance(path, str):
            path = [path]
        elif isinstance(path, list):
            path = check.list_param(path, "path", of_type=str)
        else:
            path = check.tuple_param(path, "path", of_type=str)

        return super(AssetKey, cls).__new__(cls, path=path)
Esempio n. 12
0
def check_info_fields(info, *fields):
    check.inst_param(info, 'info', PipelineTargetInfo)
    check.tuple_param(fields, 'fields')

    info_dict = info._asdict()
    for field in fields:
        if info_dict[field] is None:
            return False

    for none_field in INFO_FIELDS.difference(set(fields)):
        if info_dict[none_field] is not None:
            raise InvalidPipelineLoadingComboError(
                ('field: {none_field} with value {value} should not be set if'
                 '{fields} were provided').format(value=repr(
                     info_dict[none_field]),
                                                  none_field=none_field,
                                                  fields=repr(fields)))

    return True
Esempio n. 13
0
    def __new__(
        cls,
        asset_key: Union[List[str], AssetKey, str],
        description: Optional[str] = None,
        metadata_entries: Optional[List[Union[MetadataEntry,
                                              PartitionMetadataEntry]]] = None,
        partition: Optional[str] = None,
        tags: Optional[Dict[str, str]] = None,
        metadata: Optional[Dict[str, RawMetadataValue]] = None,
    ):
        if isinstance(asset_key, AssetKey):
            check.inst_param(asset_key, "asset_key", AssetKey)
        elif isinstance(asset_key, str):
            asset_key = AssetKey(parse_asset_key_string(asset_key))
        elif isinstance(asset_key, list):
            check.list_param(asset_key, "asset_key", of_type=str)
            asset_key = AssetKey(asset_key)
        else:
            check.tuple_param(asset_key, "asset_key", of_type=str)
            asset_key = AssetKey(asset_key)

        if tags:
            experimental_class_param_warning("tags", "AssetMaterialization")

        metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
        metadata_entries = check.opt_list_param(metadata_entries,
                                                "metadata_entries",
                                                of_type=MetadataEntry)

        return super(AssetMaterialization, cls).__new__(
            cls,
            asset_key=asset_key,
            description=check.opt_str_param(description, "description"),
            metadata_entries=normalize_metadata(metadata, metadata_entries),
            partition=check.opt_str_param(partition, "partition"),
            tags=check.opt_dict_param(tags,
                                      "tags",
                                      key_type=str,
                                      value_type=str),
        )
Esempio n. 14
0
    def __init__(self, *args, **kwargs):
        k8s_api_exception = check.inst_param(
            kwargs.pop("k8s_api_exception"), "k8s_api_exception", Exception
        )
        original_exc_info = check.tuple_param(kwargs.pop("original_exc_info"), "original_exc_info")

        check.invariant(original_exc_info[0] is not None)
        super(DagsterK8sUnrecoverableAPIError, self).__init__(args[0], *args[1:], **kwargs)

        self.k8s_api_exception = check.opt_inst_param(
            k8s_api_exception, "k8s_api_exception", Exception
        )
        self.original_exc_info = original_exc_info
Esempio n. 15
0
    def __init__(self, storage_key, path, compute_fn, deps,
                 output_in_memory_type):
        self._compute_fn = check.callable_param(compute_fn, 'compute_fn')
        self._deps = check.dict_param(deps,
                                      'deps',
                                      key_type=str,
                                      value_type=AssetDependency)
        self._output_in_memory_type = check.inst_param(
            output_in_memory_type, 'output_in_memory_type', type)

        super(ComputedAsset, self).__init__(
            storage_key=check.str_param(storage_key, 'storage_key'),
            path=check.tuple_param(path, 'path', of_type=str),
        )
Esempio n. 16
0
    def __init__(self, *args, **kwargs):
        # original_exc_info should be gotten from a sys.exc_info() call at the
        # callsite inside of the exception handler. this will allow consuming
        # code to *re-raise* the user error in it's original format
        # for cleaner error reporting that does not have framework code in it
        user_exception = check.inst_param(kwargs.pop("user_exception"), "user_exception", Exception)
        original_exc_info = check.tuple_param(kwargs.pop("original_exc_info"), "original_exc_info")

        check.invariant(original_exc_info[0] is not None)

        super(DagsterUserCodeExecutionError, self).__init__(args[0], *args[1:], **kwargs)

        self.user_exception = check.opt_inst_param(user_exception, "user_exception", Exception)
        self.original_exc_info = original_exc_info
Esempio n. 17
0
def backoff(
    fn,
    retry_on,
    args=None,
    kwargs=None,
    max_retries=BACKOFF_MAX_RETRIES,
    delay_generator=backoff_delay_generator(),
):
    """Straightforward backoff implementation.

    Note that this doesn't implement any jitter on the delays, so probably won't be appropriate for very
    parallel situations.
    
    Args:
        fn (Callable): The function to wrap in a backoff/retry loop.
        retry_on (Tuple[Exception, ...]): The exception classes on which to retry. Note that we don't (yet)
            have any support for matching the exception messages.
        args (Optional[List[Any]]): Positional args to pass to the callable.
        kwargs (Optional[Dict[str, Any]]): Keyword args to pass to the callable.
        max_retries (Optional[Int]): The maximum number of times to retry a failed fn call. Set to 0 for no backoff.
            Default: 4
        delay_generator (Generator[float, None, None]): Generates the successive delays between retry attempts.
    """
    check.callable_param(fn, "fn")
    retry_on = check.tuple_param(retry_on, "retry_on")
    args = check.opt_list_param(args, "args")
    kwargs = check.opt_dict_param(kwargs, "kwargs", key_type=str)
    check.int_param(max_retries, "max_retries")
    check.generator_param(delay_generator, "delay_generator")

    retries = 0

    to_raise = None

    try:
        return fn(*args, **kwargs)
    except retry_on as exc:
        to_raise = exc

    while retries < max_retries:
        time.sleep(six.next(delay_generator))
        try:
            return fn(*args, **kwargs)
        except retry_on as exc:
            retries += 1
            to_raise = exc
            continue

    raise to_raise
Esempio n. 18
0
 def __init__(self, python_type, key=None, name=None, **kwargs):
     if isinstance(python_type, tuple):
         self.python_type = check.tuple_param(
             python_type, "python_type", of_type=tuple(check.type_types for item in python_type)
         )
         self.type_str = "Union[{}]".format(
             ", ".join(python_type.__name__ for python_type in python_type)
         )
     else:
         self.python_type = check.type_param(python_type, "python_type")
         self.type_str = python_type.__name__
     name = check.opt_str_param(name, "name", self.type_str)
     key = check.opt_str_param(key, "key", name)
     super(PythonObjectDagsterType, self).__init__(
         key=key, name=name, type_check_fn=self.type_check_method, **kwargs
     )
Esempio n. 19
0
    def __new__(cls, reconstructor_pointer, reconstructable_args, reconstructable_kwargs):
        check.inst_param(reconstructor_pointer, "reconstructor_pointer", ModuleCodePointer)
        check.tuple_param(reconstructable_args, "reconstructable_args")
        check.tuple_param(reconstructable_kwargs, "reconstructable_kwargs")
        for reconstructable_kwarg in reconstructable_kwargs:
            check.tuple_param(reconstructable_kwarg, "reconstructable_kwarg")
            check.invariant(check.is_str(reconstructable_kwarg[0]), "Bad kwarg key")

        return super(CustomPointer, cls).__new__(
            cls, reconstructor_pointer, reconstructable_args, reconstructable_kwargs
        )
Esempio n. 20
0
    def __init__(self, *args, **kwargs):
        k8s_api_exception = check.inst_param(kwargs.pop("k8s_api_exception"),
                                             "k8s_api_exception", Exception)
        original_exc_info = check.tuple_param(kwargs.pop("original_exc_info"),
                                              "original_exc_info")
        max_retries = check.int_param(kwargs.pop("max_retries"), "max_retries")

        check.invariant(original_exc_info[0] is not None)
        msg = _add_inner_exception_for_py2(args[0], original_exc_info)
        super(DagsterK8sAPIRetryLimitExceeded, self).__init__(
            "Retry limit of {max_retries} exceeded: ".format(
                max_retries=max_retries) + msg, *args[1:], **kwargs)

        self.k8s_api_exception = check.opt_inst_param(k8s_api_exception,
                                                      "k8s_api_exception",
                                                      Exception)
        self.original_exc_info = original_exc_info
Esempio n. 21
0
    def __init__(self, *args, **kwargs):
        # original_exc_info should be gotten from a sys.exc_info() call at the
        # callsite inside of the exception handler. this will allow consuming
        # code to *re-raise* the user error in it's original format
        # for cleaner error reporting that does not have framework code in it
        user_exception = check.inst_param(kwargs.pop('user_exception'), 'user_exception', Exception)
        original_exc_info = check.tuple_param(kwargs.pop('original_exc_info'), 'original_exc_info')

        if original_exc_info[0] is None:
            raise Exception('bad dude {}'.format(type(self)))

        msg = _add_inner_exception_for_py2(args[0], original_exc_info)

        super(DagsterUserCodeExecutionError, self).__init__(msg, *args[1:], **kwargs)

        self.user_exception = check.opt_inst_param(user_exception, 'user_exception', Exception)
        self.original_exc_info = original_exc_info
Esempio n. 22
0
def serialize_dagster_namedtuple(nt: NamedTuple, **json_kwargs) -> str:
    """Serialize a whitelisted named tuple to a json encoded string"""
    check.tuple_param(nt, "nt")
    return _serialize_dagster_namedtuple(nt,
                                         whitelist_map=_WHITELIST_MAP,
                                         **json_kwargs)
Esempio n. 23
0
 def __init__(self, storage_key, path):
     self._storage_key = check.str_param(storage_key, 'storage_key')
     self._path = check.tuple_param(path, 'path', of_type=str)
     self._dagster_type = DagsterType(type_check_fn=lambda a, b: True,
                                      name='.'.join(self.path))
Esempio n. 24
0
def test_tuple_param():
    assert check.tuple_param((1, 2), "something")

    with pytest.raises(CheckError):
        assert check.tuple_param(None, "something")

    with pytest.raises(CheckError):
        assert check.tuple_param(1, "something")

    with pytest.raises(CheckError):
        assert check.tuple_param([1], "something")

    with pytest.raises(CheckError):
        assert check.tuple_param({1: 2}, "something")

    with pytest.raises(CheckError):
        assert check.tuple_param("kdjfkd", "something")

    assert check.tuple_param((3, 4), "something", of_type=int)
    assert check.tuple_param(("foo", "bar"), "something", of_type=str)

    assert check.tuple_param((3, 4), "something", of_type=(int, int))
    assert check.tuple_param((3, 4), "something", of_type=(int, int))
    assert check.tuple_param((3, "bar"), "something", of_type=(int, str))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4, 5), "something", of_type=(int, int))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4), "something", of_type=(int, int, int))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4), "something", of_type=(int, str))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4), "something", of_type=(str, str))
Esempio n. 25
0
def execute_cli(
    executable: str,
    command: Tuple[str, ...],
    flags_dict: Dict[str, Any],
    log: Any,
    warn_error: bool,
    ignore_handled_error: bool,
) -> Dict[str, Any]:
    """Executes a command on the dbt CLI in a subprocess."""
    check.str_param(executable, "executable")
    check.tuple_param(command, "command", of_type=str)
    check.dict_param(flags_dict, "flags_dict", key_type=str)
    check.bool_param(warn_error, "warn_error")
    check.bool_param(ignore_handled_error, "ignore_handled_error")

    # Format the dbt CLI flags in the command..
    warn_error = ["--warn-error"] if warn_error else []
    command_list = [executable, "--log-format", "json", *warn_error, *command]

    for flag, value in flags_dict.items():
        if not value:
            continue

        command_list.append(f"--{flag}")

        if isinstance(value, bool):
            # If a bool flag (and is True), the presence of the flag itself is enough.
            continue

        if isinstance(value, list):
            check.list_param(value, f"config.{flag}", of_type=str)
            command_list += value
            continue

        if isinstance(value, dict):
            command_list.append(json.dumps(value))
            continue

        command_list.append(str(value))

    # Execute the dbt CLI command in a subprocess.
    command = " ".join(command_list)
    log.info(f"Executing command: $ {command}")

    return_code = 0
    try:
        proc_out = subprocess.check_output(args=command_list,
                                           stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as exc:
        return_code = exc.returncode
        proc_out = exc.output

    # Parse the JSON logs from the dbt process.
    logs = []
    for raw_line in proc_out.strip().split(b"\n"):
        line = raw_line.decode()
        log.info(line.rstrip())
        try:
            json_line = json.loads(line)
        except json.JSONDecodeError:
            pass
        else:
            logs.append(json_line)

    log.info("dbt exited with return code {return_code}".format(
        return_code=return_code))

    if return_code == 2:
        raise DagsterDbtCliFatalRuntimeError(logs=logs,
                                             raw_output=proc_out.decode())

    if return_code == 1 and not ignore_handled_error:
        raise DagsterDbtCliHandledRuntimeError(logs=logs,
                                               raw_output=proc_out.decode())

    return {
        "command": command,
        "return_code": return_code,
        "logs": logs,
        "raw_output": proc_out.decode(),
        "summary": extract_summary(logs),
    }
Esempio n. 26
0
def execute_cli(
    executable: str,
    command: Tuple[str, ...],
    flags_dict: Dict[str, Any],
    log: Any,
    warn_error: bool,
    ignore_handled_error: bool,
) -> Dict[str, Any]:
    """Executes a command on the dbt CLI in a subprocess."""
    check.str_param(executable, "executable")
    check.tuple_param(command, "command", of_type=str)
    check.dict_param(flags_dict, "flags_dict", key_type=str)
    check.bool_param(warn_error, "warn_error")
    check.bool_param(ignore_handled_error, "ignore_handled_error")

    # Format the dbt CLI flags in the command..
    warn_error = ["--warn-error"] if warn_error else []
    command_list = [executable, "--log-format", "json", *warn_error, *command]

    for flag, value in flags_dict.items():
        if not value:
            continue

        command_list.append(f"--{flag}")

        if isinstance(value, bool):
            # If a bool flag (and is True), the presence of the flag itself is enough.
            continue

        if isinstance(value, list):
            check.list_param(value, f"config.{flag}", of_type=str)
            command_list += value
            continue

        if isinstance(value, dict):
            command_list.append(json.dumps(value))
            continue

        command_list.append(str(value))

    # Execute the dbt CLI command in a subprocess.
    command = " ".join(command_list)
    log.info(f"Executing command: {command}")

    return_code = 0
    process = subprocess.Popen(command_list, stdout=subprocess.PIPE)
    logs = []

    output = []
    for raw_line in process.stdout:
        line = raw_line.decode("utf-8")
        output.append(line)
        try:
            json_line = json.loads(line)
        except json.JSONDecodeError:
            log.info(line.rstrip())
        else:
            logs.append(json_line)
            level = json_line.get("levelname", "").lower()
            if hasattr(log, level):
                getattr(log, level)(json_line.get("message", ""))
            else:
                log.info(line.rstrip())

    process.wait()
    return_code = process.returncode

    log.info("dbt exited with return code {return_code}".format(
        return_code=return_code))

    raw_output = "\n".join(output)

    if return_code == 2:
        raise DagsterDbtCliFatalRuntimeError(logs=logs, raw_output=raw_output)

    if return_code == 1 and not ignore_handled_error:
        raise DagsterDbtCliHandledRuntimeError(logs=logs,
                                               raw_output=raw_output)

    return {
        "command": command,
        "return_code": return_code,
        "logs": logs,
        "raw_output": raw_output,
        "summary": extract_summary(logs),
    }
Esempio n. 27
0
def test_tuple_param():
    assert check.tuple_param((1, 2), 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param(None, 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param(1, 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param([1], 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param({1: 2}, 'something')

    with pytest.raises(CheckError):
        assert check.tuple_param('kdjfkd', 'something')

    assert check.tuple_param((3, 4), 'something', of_type=int)
    assert check.tuple_param(('foo', 'bar'), 'something', of_type=str)

    assert check.tuple_param((3, 4), 'something', of_type=(int, int))
    assert check.tuple_param((3, 4), 'something', of_type=(int, int))
    assert check.tuple_param((3, 'bar'), 'something', of_type=(int, str))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4, 5), 'something', of_type=(int, int))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4), 'something', of_type=(int, int, int))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4), 'something', of_type=(int, str))

    with pytest.raises(CheckError):
        check.tuple_param((3, 4), 'something', of_type=(str, str))
Esempio n. 28
0
def serialize_dagster_namedtuple(nt, **json_kwargs):
    return _serialize_dagster_namedtuple(check.tuple_param(nt, "nt"),
                                         whitelist_map=_WHITELIST_MAP,
                                         **json_kwargs)