def _to_target(value, from_string_kwargs=None): if value is None: return value from dbnd._core.task import Task if isinstance(value, Target): return value if isinstance(value, Task): if value.task_definition.single_result_output: return value.result return value.task_outputs airflow_op_task = _try_get_task_from_airflow_op(value) if airflow_op_task: return airflow_op_task.task_outputs if isinstance(value, six.string_types): from_string_kwargs = from_string_kwargs or {} return target(value, **from_string_kwargs) if isinstance(value, Path): from_string_kwargs = from_string_kwargs or {} return target(str(value), **from_string_kwargs) raise friendly_error.failed_to_convert_value_to_target(value)
def calc_init_value(self, value): if value is None: # it's None # TODO: may be we still can "normalize" the value return value if isinstance(value, Path): return target(str(value), config=self.target_config) if isinstance(value, Target): # it's deferred result - > we load it lately return value # we process value regardless parse! # cf_value.require_parse: if self.env_interpolation and isinstance(value, six.string_types): try: value = expand_env_var(value) except Exception as ex: logger.warning( "failed to expand variable '%s' : %s", safe_string(value), str(ex) ) # in case we are output and have value: # it's Target or it's str to be converted as target load_value = self.load_on_build and not self.is_output() return self.value_type.parse_value( value, load_value=load_value, target_config=self.target_config )
def list_partitions(self): all_files = self.fs.listdir(self.path) all_files = sorted(all_files) all_files = filter(lambda x: not self.is_meta_file(x), all_files) if self.flag_target: all_files = filter(lambda x: x != str(self.flag_target), all_files) return [ target( p, fs=self._fs, config=self.config.as_file(), properties=self.properties, source=self.source, ) for p in all_files ]
def partition(self, name=NOTHING, extension=NOTHING, config=NOTHING, **kwargs): """ :param config: :param name: file name of the partition. if not provided - "part-%04d" % ID :param extension: extension. if not provided -> default extension will be used :return: FileTarget that represents the partition. """ if is_not_defined(name): name = "part-%04d" % self._auto_partition_count self._auto_partition_count += 1 if is_not_defined(config): # only if it's a call not from file,folder - we set it as file config = self.config.as_file() if is_not_defined(extension): extension = config.get_ext() if extension: name += extension return target(self.path, name, config=config, fs=self._fs, **kwargs)
def __init__(self, path, fs, config=None, io_pipe=None, source=None): """ If `flag` Defines a target directory with a flag-file (defaults to `_SUCCESS`) used to signify job success. This checks for two things: * the path exists (just like the S3Target) * the _SUCCESS file exists within the directory. Because Hadoop outputs into a directory and not a single file, the path is assumed to be a directory. :param path: :param fs: :param config: :param io_pipe: :param flag: :param source: """ super(DirTarget, self).__init__(path=path, fs=fs, config=config, io_pipe=io_pipe, source=source) if path[-1] != "/" and path[-1] != "\\": raise ValueError("%s requires the path to be to a " "directory. It must end with a slash ( / )." % self.__class__.__name__) flag = config.flag if flag is True: flag = DEFAULT_FLAG_FILE_NAME # default value, otherwise override it self.flag_target = target(path, flag, fs=fs) if flag else None self._auto_partition_count = 0 self._write_target = self.partition("part-0000") self.meta_files = config.meta_files