def scaffold(module_name, pipeline_name, output_path, config, preset): """Creates a DAG file for a specified dagster pipeline""" check.tuple_param(config, "config", of_type=str) check.invariant(isinstance(config, tuple)) check.invariant( output_path is not None, "You must specify --output-path or set AIRFLOW_HOME to use this script.", ) run_config = construct_environment_yaml(preset, config, pipeline_name, module_name) file_contents = construct_scaffolded_file_contents(module_name, pipeline_name, run_config) # Ensure output_path/dags exists dags_path = os.path.join(os.path.expanduser(output_path), "dags") if not os.path.isdir(dags_path): os.makedirs(dags_path) dag_file = os.path.join(os.path.expanduser(output_path), "dags", pipeline_name + ".py") click.echo("Wrote DAG scaffold to file: %s" % dag_file) with open(dag_file, "wb") as f: f.write(file_contents)
def scaffold(module_name, pipeline_name, output_path, config, preset, job_name): """Creates a DAG file for a specified dagster pipeline""" job_name = canonicalize_backcompat_args( new_val=job_name, new_arg="job_name", old_val=pipeline_name, old_arg="pipeline_name", breaking_version="future versions", coerce_old_to_new=lambda val: val, ) check.invariant(job_name is not None, "You must specify either --job-name or --pipeline-name.") check.tuple_param(config, "config", of_type=str) check.invariant(isinstance(config, tuple)) check.invariant( output_path is not None, "You must specify --output-path or set AIRFLOW_HOME to use this script.", ) run_config = construct_environment_yaml(preset, config, job_name, module_name) file_contents = construct_scaffolded_file_contents(module_name, job_name, run_config) # Ensure output_path/dags exists dags_path = os.path.join(os.path.expanduser(output_path), "dags") if not os.path.isdir(dags_path): os.makedirs(dags_path) dag_file = os.path.join(os.path.expanduser(output_path), "dags", job_name + ".py") click.echo("Wrote DAG scaffold to file: %s" % dag_file) with open(dag_file, "wb") as f: f.write(file_contents)
def __new__( cls, asset_key: Union[List[str], AssetKey, str], description: Optional[str] = None, metadata_entries: Optional[List[MetadataEntry]] = None, partition: Optional[str] = None, metadata: Optional[Dict[str, RawMetadataValue]] = None, ): if isinstance(asset_key, AssetKey): check.inst_param(asset_key, "asset_key", AssetKey) elif isinstance(asset_key, str): asset_key = AssetKey(parse_asset_key_string(asset_key)) elif isinstance(asset_key, list): check.list_param(asset_key, "asset_key", of_type=str) asset_key = AssetKey(asset_key) else: check.tuple_param(asset_key, "asset_key", of_type=str) asset_key = AssetKey(asset_key) metadata = check.opt_dict_param(metadata, "metadata", key_type=str) metadata_entries = check.opt_list_param(metadata_entries, "metadata_entries", of_type=MetadataEntry) return super(AssetObservation, cls).__new__( cls, asset_key=asset_key, description=check.opt_str_param(description, "description"), metadata_entries=cast( List[MetadataEntry], normalize_metadata(metadata, metadata_entries)), partition=check.opt_str_param(partition, "partition"), )
def canonicalize_path( path: Union[Tuple[str, ...], List[str], str]) -> Tuple[str, ...]: if isinstance(path, str): return (path, ) elif isinstance(path, tuple): check.tuple_param(path, "path", of_type=str) return path elif isinstance(path, list): check.list_param(path, "path", of_type=str) return tuple(path) else: check.failed( "path param must be a string, list of strings, or tuple of strings" )
def __init__(self, storage_key, path, computation): self._storage_key = check.str_param(storage_key, 'storage_key') self._path = check.tuple_param(path, 'path', of_type=str) self._computation = check.opt_inst_param(computation, 'computation', Computation) self._dagster_type = DagsterType(type_check_fn=lambda a, b: True, name='.'.join(self.path))
def serialize_dagster_namedtuple(nt, **json_kwargs): return _serialize_dagster_namedtuple( check.tuple_param(nt, 'nt'), enum_map=_WHITELISTED_ENUM_MAP, tuple_map=_WHITELISTED_TUPLE_MAP, **json_kwargs )
def __init__( self, python_type: t.Union[t.Type, t.Tuple[t.Type, ...]], key: t.Optional[str] = None, name: t.Optional[str] = None, **kwargs, ): if isinstance(python_type, tuple): self.python_type = check.tuple_param( python_type, "python_type", of_shape=tuple(type for item in python_type) ) self.type_str = "Union[{}]".format( ", ".join(python_type.__name__ for python_type in python_type) ) typing_type = t.Union[python_type] # type: ignore else: self.python_type = check.type_param(python_type, "python_type") # type: ignore self.type_str = cast(str, python_type.__name__) typing_type = self.python_type # type: ignore name = check.opt_str_param(name, "name", self.type_str) key = check.opt_str_param(key, "key", name) super(PythonObjectDagsterType, self).__init__( key=key, name=name, type_check_fn=isinstance_type_check_fn(python_type, name, self.type_str), typing_type=typing_type, **kwargs, )
def test_opt_tuple_param(): assert check.opt_tuple_param((1, 2), 'something') assert check.opt_tuple_param(None, 'something') is None assert check.opt_tuple_param(None, 'something', (2)) == (2) with pytest.raises(CheckError): assert check.tuple_param(1, 'something') with pytest.raises(CheckError): assert check.tuple_param([1], 'something') with pytest.raises(CheckError): assert check.tuple_param({1: 2}, 'something') with pytest.raises(CheckError): assert check.tuple_param('kdjfkd', 'something')
def __new__(cls, path=None): if isinstance(path, str): path = [path] elif isinstance(path, list): path = check.list_param(path, "path", of_type=str) else: path = check.tuple_param(path, "path", of_type=str) return super(AssetKey, cls).__new__(cls, path=path)
def __new__(cls, path=None): if check.is_str(path): path = [validate_asset_key_string(path)] elif isinstance(path, list): path = validate_structured_asset_key(check.list_param(path, "path", of_type=str)) else: path = validate_structured_asset_key(check.tuple_param(path, "path", of_type=str)) return super(AssetKey, cls).__new__(cls, path=path)
def __new__(cls, path: Optional[Union[str, List[str], Tuple[str, ...]]] = None): if isinstance(path, str): path = [path] elif isinstance(path, list): path = check.list_param(path, "path", of_type=str) else: path = check.tuple_param(path, "path", of_type=str) return super(AssetKey, cls).__new__(cls, path=path)
def check_info_fields(info, *fields): check.inst_param(info, 'info', PipelineTargetInfo) check.tuple_param(fields, 'fields') info_dict = info._asdict() for field in fields: if info_dict[field] is None: return False for none_field in INFO_FIELDS.difference(set(fields)): if info_dict[none_field] is not None: raise InvalidPipelineLoadingComboError( ('field: {none_field} with value {value} should not be set if' '{fields} were provided').format(value=repr( info_dict[none_field]), none_field=none_field, fields=repr(fields))) return True
def __new__( cls, asset_key: Union[List[str], AssetKey, str], description: Optional[str] = None, metadata_entries: Optional[List[Union[MetadataEntry, PartitionMetadataEntry]]] = None, partition: Optional[str] = None, tags: Optional[Dict[str, str]] = None, metadata: Optional[Dict[str, RawMetadataValue]] = None, ): if isinstance(asset_key, AssetKey): check.inst_param(asset_key, "asset_key", AssetKey) elif isinstance(asset_key, str): asset_key = AssetKey(parse_asset_key_string(asset_key)) elif isinstance(asset_key, list): check.list_param(asset_key, "asset_key", of_type=str) asset_key = AssetKey(asset_key) else: check.tuple_param(asset_key, "asset_key", of_type=str) asset_key = AssetKey(asset_key) if tags: experimental_class_param_warning("tags", "AssetMaterialization") metadata = check.opt_dict_param(metadata, "metadata", key_type=str) metadata_entries = check.opt_list_param(metadata_entries, "metadata_entries", of_type=MetadataEntry) return super(AssetMaterialization, cls).__new__( cls, asset_key=asset_key, description=check.opt_str_param(description, "description"), metadata_entries=normalize_metadata(metadata, metadata_entries), partition=check.opt_str_param(partition, "partition"), tags=check.opt_dict_param(tags, "tags", key_type=str, value_type=str), )
def __init__(self, *args, **kwargs): k8s_api_exception = check.inst_param( kwargs.pop("k8s_api_exception"), "k8s_api_exception", Exception ) original_exc_info = check.tuple_param(kwargs.pop("original_exc_info"), "original_exc_info") check.invariant(original_exc_info[0] is not None) super(DagsterK8sUnrecoverableAPIError, self).__init__(args[0], *args[1:], **kwargs) self.k8s_api_exception = check.opt_inst_param( k8s_api_exception, "k8s_api_exception", Exception ) self.original_exc_info = original_exc_info
def __init__(self, storage_key, path, compute_fn, deps, output_in_memory_type): self._compute_fn = check.callable_param(compute_fn, 'compute_fn') self._deps = check.dict_param(deps, 'deps', key_type=str, value_type=AssetDependency) self._output_in_memory_type = check.inst_param( output_in_memory_type, 'output_in_memory_type', type) super(ComputedAsset, self).__init__( storage_key=check.str_param(storage_key, 'storage_key'), path=check.tuple_param(path, 'path', of_type=str), )
def __init__(self, *args, **kwargs): # original_exc_info should be gotten from a sys.exc_info() call at the # callsite inside of the exception handler. this will allow consuming # code to *re-raise* the user error in it's original format # for cleaner error reporting that does not have framework code in it user_exception = check.inst_param(kwargs.pop("user_exception"), "user_exception", Exception) original_exc_info = check.tuple_param(kwargs.pop("original_exc_info"), "original_exc_info") check.invariant(original_exc_info[0] is not None) super(DagsterUserCodeExecutionError, self).__init__(args[0], *args[1:], **kwargs) self.user_exception = check.opt_inst_param(user_exception, "user_exception", Exception) self.original_exc_info = original_exc_info
def backoff( fn, retry_on, args=None, kwargs=None, max_retries=BACKOFF_MAX_RETRIES, delay_generator=backoff_delay_generator(), ): """Straightforward backoff implementation. Note that this doesn't implement any jitter on the delays, so probably won't be appropriate for very parallel situations. Args: fn (Callable): The function to wrap in a backoff/retry loop. retry_on (Tuple[Exception, ...]): The exception classes on which to retry. Note that we don't (yet) have any support for matching the exception messages. args (Optional[List[Any]]): Positional args to pass to the callable. kwargs (Optional[Dict[str, Any]]): Keyword args to pass to the callable. max_retries (Optional[Int]): The maximum number of times to retry a failed fn call. Set to 0 for no backoff. Default: 4 delay_generator (Generator[float, None, None]): Generates the successive delays between retry attempts. """ check.callable_param(fn, "fn") retry_on = check.tuple_param(retry_on, "retry_on") args = check.opt_list_param(args, "args") kwargs = check.opt_dict_param(kwargs, "kwargs", key_type=str) check.int_param(max_retries, "max_retries") check.generator_param(delay_generator, "delay_generator") retries = 0 to_raise = None try: return fn(*args, **kwargs) except retry_on as exc: to_raise = exc while retries < max_retries: time.sleep(six.next(delay_generator)) try: return fn(*args, **kwargs) except retry_on as exc: retries += 1 to_raise = exc continue raise to_raise
def __init__(self, python_type, key=None, name=None, **kwargs): if isinstance(python_type, tuple): self.python_type = check.tuple_param( python_type, "python_type", of_type=tuple(check.type_types for item in python_type) ) self.type_str = "Union[{}]".format( ", ".join(python_type.__name__ for python_type in python_type) ) else: self.python_type = check.type_param(python_type, "python_type") self.type_str = python_type.__name__ name = check.opt_str_param(name, "name", self.type_str) key = check.opt_str_param(key, "key", name) super(PythonObjectDagsterType, self).__init__( key=key, name=name, type_check_fn=self.type_check_method, **kwargs )
def __new__(cls, reconstructor_pointer, reconstructable_args, reconstructable_kwargs): check.inst_param(reconstructor_pointer, "reconstructor_pointer", ModuleCodePointer) check.tuple_param(reconstructable_args, "reconstructable_args") check.tuple_param(reconstructable_kwargs, "reconstructable_kwargs") for reconstructable_kwarg in reconstructable_kwargs: check.tuple_param(reconstructable_kwarg, "reconstructable_kwarg") check.invariant(check.is_str(reconstructable_kwarg[0]), "Bad kwarg key") return super(CustomPointer, cls).__new__( cls, reconstructor_pointer, reconstructable_args, reconstructable_kwargs )
def __init__(self, *args, **kwargs): k8s_api_exception = check.inst_param(kwargs.pop("k8s_api_exception"), "k8s_api_exception", Exception) original_exc_info = check.tuple_param(kwargs.pop("original_exc_info"), "original_exc_info") max_retries = check.int_param(kwargs.pop("max_retries"), "max_retries") check.invariant(original_exc_info[0] is not None) msg = _add_inner_exception_for_py2(args[0], original_exc_info) super(DagsterK8sAPIRetryLimitExceeded, self).__init__( "Retry limit of {max_retries} exceeded: ".format( max_retries=max_retries) + msg, *args[1:], **kwargs) self.k8s_api_exception = check.opt_inst_param(k8s_api_exception, "k8s_api_exception", Exception) self.original_exc_info = original_exc_info
def __init__(self, *args, **kwargs): # original_exc_info should be gotten from a sys.exc_info() call at the # callsite inside of the exception handler. this will allow consuming # code to *re-raise* the user error in it's original format # for cleaner error reporting that does not have framework code in it user_exception = check.inst_param(kwargs.pop('user_exception'), 'user_exception', Exception) original_exc_info = check.tuple_param(kwargs.pop('original_exc_info'), 'original_exc_info') if original_exc_info[0] is None: raise Exception('bad dude {}'.format(type(self))) msg = _add_inner_exception_for_py2(args[0], original_exc_info) super(DagsterUserCodeExecutionError, self).__init__(msg, *args[1:], **kwargs) self.user_exception = check.opt_inst_param(user_exception, 'user_exception', Exception) self.original_exc_info = original_exc_info
def serialize_dagster_namedtuple(nt: NamedTuple, **json_kwargs) -> str: """Serialize a whitelisted named tuple to a json encoded string""" check.tuple_param(nt, "nt") return _serialize_dagster_namedtuple(nt, whitelist_map=_WHITELIST_MAP, **json_kwargs)
def __init__(self, storage_key, path): self._storage_key = check.str_param(storage_key, 'storage_key') self._path = check.tuple_param(path, 'path', of_type=str) self._dagster_type = DagsterType(type_check_fn=lambda a, b: True, name='.'.join(self.path))
def test_tuple_param(): assert check.tuple_param((1, 2), "something") with pytest.raises(CheckError): assert check.tuple_param(None, "something") with pytest.raises(CheckError): assert check.tuple_param(1, "something") with pytest.raises(CheckError): assert check.tuple_param([1], "something") with pytest.raises(CheckError): assert check.tuple_param({1: 2}, "something") with pytest.raises(CheckError): assert check.tuple_param("kdjfkd", "something") assert check.tuple_param((3, 4), "something", of_type=int) assert check.tuple_param(("foo", "bar"), "something", of_type=str) assert check.tuple_param((3, 4), "something", of_type=(int, int)) assert check.tuple_param((3, 4), "something", of_type=(int, int)) assert check.tuple_param((3, "bar"), "something", of_type=(int, str)) with pytest.raises(CheckError): check.tuple_param((3, 4, 5), "something", of_type=(int, int)) with pytest.raises(CheckError): check.tuple_param((3, 4), "something", of_type=(int, int, int)) with pytest.raises(CheckError): check.tuple_param((3, 4), "something", of_type=(int, str)) with pytest.raises(CheckError): check.tuple_param((3, 4), "something", of_type=(str, str))
def execute_cli( executable: str, command: Tuple[str, ...], flags_dict: Dict[str, Any], log: Any, warn_error: bool, ignore_handled_error: bool, ) -> Dict[str, Any]: """Executes a command on the dbt CLI in a subprocess.""" check.str_param(executable, "executable") check.tuple_param(command, "command", of_type=str) check.dict_param(flags_dict, "flags_dict", key_type=str) check.bool_param(warn_error, "warn_error") check.bool_param(ignore_handled_error, "ignore_handled_error") # Format the dbt CLI flags in the command.. warn_error = ["--warn-error"] if warn_error else [] command_list = [executable, "--log-format", "json", *warn_error, *command] for flag, value in flags_dict.items(): if not value: continue command_list.append(f"--{flag}") if isinstance(value, bool): # If a bool flag (and is True), the presence of the flag itself is enough. continue if isinstance(value, list): check.list_param(value, f"config.{flag}", of_type=str) command_list += value continue if isinstance(value, dict): command_list.append(json.dumps(value)) continue command_list.append(str(value)) # Execute the dbt CLI command in a subprocess. command = " ".join(command_list) log.info(f"Executing command: $ {command}") return_code = 0 try: proc_out = subprocess.check_output(args=command_list, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as exc: return_code = exc.returncode proc_out = exc.output # Parse the JSON logs from the dbt process. logs = [] for raw_line in proc_out.strip().split(b"\n"): line = raw_line.decode() log.info(line.rstrip()) try: json_line = json.loads(line) except json.JSONDecodeError: pass else: logs.append(json_line) log.info("dbt exited with return code {return_code}".format( return_code=return_code)) if return_code == 2: raise DagsterDbtCliFatalRuntimeError(logs=logs, raw_output=proc_out.decode()) if return_code == 1 and not ignore_handled_error: raise DagsterDbtCliHandledRuntimeError(logs=logs, raw_output=proc_out.decode()) return { "command": command, "return_code": return_code, "logs": logs, "raw_output": proc_out.decode(), "summary": extract_summary(logs), }
def execute_cli( executable: str, command: Tuple[str, ...], flags_dict: Dict[str, Any], log: Any, warn_error: bool, ignore_handled_error: bool, ) -> Dict[str, Any]: """Executes a command on the dbt CLI in a subprocess.""" check.str_param(executable, "executable") check.tuple_param(command, "command", of_type=str) check.dict_param(flags_dict, "flags_dict", key_type=str) check.bool_param(warn_error, "warn_error") check.bool_param(ignore_handled_error, "ignore_handled_error") # Format the dbt CLI flags in the command.. warn_error = ["--warn-error"] if warn_error else [] command_list = [executable, "--log-format", "json", *warn_error, *command] for flag, value in flags_dict.items(): if not value: continue command_list.append(f"--{flag}") if isinstance(value, bool): # If a bool flag (and is True), the presence of the flag itself is enough. continue if isinstance(value, list): check.list_param(value, f"config.{flag}", of_type=str) command_list += value continue if isinstance(value, dict): command_list.append(json.dumps(value)) continue command_list.append(str(value)) # Execute the dbt CLI command in a subprocess. command = " ".join(command_list) log.info(f"Executing command: {command}") return_code = 0 process = subprocess.Popen(command_list, stdout=subprocess.PIPE) logs = [] output = [] for raw_line in process.stdout: line = raw_line.decode("utf-8") output.append(line) try: json_line = json.loads(line) except json.JSONDecodeError: log.info(line.rstrip()) else: logs.append(json_line) level = json_line.get("levelname", "").lower() if hasattr(log, level): getattr(log, level)(json_line.get("message", "")) else: log.info(line.rstrip()) process.wait() return_code = process.returncode log.info("dbt exited with return code {return_code}".format( return_code=return_code)) raw_output = "\n".join(output) if return_code == 2: raise DagsterDbtCliFatalRuntimeError(logs=logs, raw_output=raw_output) if return_code == 1 and not ignore_handled_error: raise DagsterDbtCliHandledRuntimeError(logs=logs, raw_output=raw_output) return { "command": command, "return_code": return_code, "logs": logs, "raw_output": raw_output, "summary": extract_summary(logs), }
def test_tuple_param(): assert check.tuple_param((1, 2), 'something') with pytest.raises(CheckError): assert check.tuple_param(None, 'something') with pytest.raises(CheckError): assert check.tuple_param(1, 'something') with pytest.raises(CheckError): assert check.tuple_param([1], 'something') with pytest.raises(CheckError): assert check.tuple_param({1: 2}, 'something') with pytest.raises(CheckError): assert check.tuple_param('kdjfkd', 'something') assert check.tuple_param((3, 4), 'something', of_type=int) assert check.tuple_param(('foo', 'bar'), 'something', of_type=str) assert check.tuple_param((3, 4), 'something', of_type=(int, int)) assert check.tuple_param((3, 4), 'something', of_type=(int, int)) assert check.tuple_param((3, 'bar'), 'something', of_type=(int, str)) with pytest.raises(CheckError): check.tuple_param((3, 4, 5), 'something', of_type=(int, int)) with pytest.raises(CheckError): check.tuple_param((3, 4), 'something', of_type=(int, int, int)) with pytest.raises(CheckError): check.tuple_param((3, 4), 'something', of_type=(int, str)) with pytest.raises(CheckError): check.tuple_param((3, 4), 'something', of_type=(str, str))
def serialize_dagster_namedtuple(nt, **json_kwargs): return _serialize_dagster_namedtuple(check.tuple_param(nt, "nt"), whitelist_map=_WHITELIST_MAP, **json_kwargs)