def __init__(self, path, *args, **kwargs): self.local_s3_path = kwargs.pop('local_s3_path', os.getenv('LOCAL_S3_PATH', None)) if not self.local_s3_path: self._proxy = S3Target(path, *args, **kwargs) else: path = os.path.join(self.local_s3_path, path.replace('s3://', '')) self._proxy = LocalTarget(path, *args, **kwargs)
def get_luigi_target(path): try: from urlparse import urlparse # Python3 compatibility except ImportError: from urllib.parse import urlparse from luigi.s3 import S3Target from luigi.contrib.ssh import RemoteTarget from luigi.file import LocalTarget from luigi.format import GzipFormat file_format = None if path.endswith(".gz"): file_format = GzipFormat(compression_level=9) if path.startswith("s3n://"): # Try to always store files in S3 in compressed format. s3_headers = {} if file_is_compressible(path): file_format = GzipFormat(compression_level=9) s3_headers["Content-Encoding"] = "gzip" return S3Target(path, format=file_format, headers=s3_headers) elif path.startswith("ssh://"): ssh_key_file = config.get("ssh", "ssh-key-file", None) no_host_key_check = config.get("ssh", "no-host-key-check", None) p = urlparse(path) return RemoteTarget(p.path, p.hostname, format=format, username=p.username, sshpass=p.password, key_file=ssh_key_file, no_host_key_check=no_host_key_check) return LocalTarget(path, format=file_format)
def get_luigi_target(path): try: from urlparse import urlparse # Python3 compatibility except ImportError: from urllib.parse import urlparse from luigi.s3 import S3Target from luigi.contrib.ssh import RemoteTarget from luigi.file import LocalTarget from luigi.format import GzipFormat file_format = None if path.endswith(".gz"): file_format = GzipFormat() if path.startswith("s3n://"): return S3Target(path, format=file_format) elif path.startswith("ssh://"): ssh_key_file = config.get("ssh", "ssh-key-file", None) no_host_key_check = config.get("ssh", "no-host-key-check", None) p = urlparse(path) return RemoteTarget(p.path, p.hostname, format=format, username=p.username, sshpass=p.password, key_file=ssh_key_file, no_host_key_check=no_host_key_check) return LocalTarget(path, format=file_format)
class FireflowerS3Target(FileSystemTarget): """ Operates the same way as S3Target, except it looks for an environment variable LOCAL_S3_PATH and kwarg named local_s3_path, which is a path on your local machine to store s3 files. If this is set, the target will read / write to this path by stripping off s3:// and following the rest of the path. Supports any format supported by FileSystemTarget. """ def __init__(self, path, *args, **kwargs): self.local_s3_path = kwargs.pop('local_s3_path', os.getenv('LOCAL_S3_PATH', None)) if not self.local_s3_path: self._proxy = S3Target(path, *args, **kwargs) else: path = os.path.join(self.local_s3_path, path.replace('s3://', '')) self._proxy = LocalTarget(path, *args, **kwargs) @property def path(self): return self._proxy.path @property def fs(self): return self._proxy.fs def open(self, mode='r'): return self._proxy.open(mode)
def output(self): return LocalTarget(path=self.output_path)
def output(self): return LocalTarget('awesome_is_here_%s.txt' % self.timestamp.strftime('%Y%m%d'))
def get_target(cls, scheme, path, fragment, username, password, hostname, port, query, **kwargs): full_path = (hostname or '') + path query.update(kwargs) return LocalTarget(full_path, **query)
def output(self): return LocalTarget(self.cached_result)
def output(self): return LocalTarget(path='test/resources/targets/TestMin15Generator.dep')