def __init__(self, path, data_extension=""): self.path = path self.tmppath = hdfs_config.tmppath(self.path) self.datapath = self.tmppath + ("/data%s" % data_extension) super(HdfsAtomicWriteDirPipe, self).__init__(load_hadoop_cmd() + ['fs', '-put', '-', self.datapath])
def __init__(self, path=None, format=None, is_tmp=False, fs=None): if path is None: assert is_tmp path = tmppath() super(HdfsTarget, self).__init__(path) if format is None: format = luigi.format.get_default_format() >> hdfs_format.Plain old_format = ( ( hasattr(format, 'hdfs_writer') or hasattr(format, 'hdfs_reader') ) and not hasattr(format, 'output') ) if not old_format and getattr(format, 'output', '') != 'hdfs': format = format >> hdfs_format.Plain if old_format: warnings.warn( 'hdfs_writer and hdfs_reader method for format is deprecated,' 'specify the property output of your format as \'hdfs\' instead', DeprecationWarning, stacklevel=2 ) if hasattr(format, 'hdfs_writer'): format_writer = format.hdfs_writer else: w_format = format >> hdfs_format.Plain format_writer = w_format.pipe_writer if hasattr(format, 'hdfs_reader'): format_reader = format.hdfs_reader else: r_format = format >> hdfs_format.Plain format_reader = r_format.pipe_reader format = hdfs_format.CompatibleHdfsFormat( format_writer, format_reader, ) else: format = hdfs_format.CompatibleHdfsFormat( format.pipe_writer, format.pipe_reader, getattr(format, 'input', None), ) self.format = format self.is_tmp = is_tmp (scheme, netloc, path, query, fragment) = urlparse.urlsplit(path) if ":" in path: raise ValueError('colon is not allowed in hdfs filenames') self._fs = fs or hdfs_clients.get_autoconfig_client()
def __init__(self, path): self.path = path self.tmppath = hdfs_config.tmppath(self.path) parent_dir = os.path.dirname(self.tmppath) mkdir(parent_dir, parents=True, raise_if_exists=False) super(HdfsAtomicWritePipe, self).__init__(load_hadoop_cmd() + ['fs', '-put', '-', self.tmppath])