def __init__(self, cachedir, basepath, read=True, write=True, read_through_write=True, delete=False, on_duplicate_key='skip', cleanup_cachedir=False, always_check_remote=False): """ Parameters ---------- always_check_remote : bool When True the remote store will be checked with every __contains__ call. Otherwise it will short-circuit if the blob is found in the cachedir. For performance reasons this should always be set to False. The only reason why you would want to use this is if you are using a RemoteStore and a DiskStore in a ChainedStore together for some reason. Since the RemoteStore basically doubles as a DiskStore with it's cachedir chaining the two doesn't really make sense though. """ super(RemoteStore, self).__init__(read=read, write=write, read_through_write=read_through_write, delete=delete, on_duplicate_key=on_duplicate_key) self.always_check = always_check_remote self.cachedir = _abspath(cachedir) self.basepath = basepath self.cleanup_cachedir = cleanup_cachedir mkdirp(self.cachedir)
def test_rm_subdirs(tmpdir): sub_path = os.path.join(tmpdir.strpath, "am", "stram") full_path = os.path.join(sub_path, "gram") mkdirp(os.path.join(full_path)) rm_subdirs(sub_path) assert os.path.exists(sub_path) assert not os.path.exists(full_path)
def test_rm_subdirs(tmpdir): sub_path = os.path.join(tmpdir.strpath, "am", "stram") full_path = os.path.join(sub_path, "gram") mkdirp(os.path.join(full_path)) rm_subdirs(sub_path) assert os.path.exists(sub_path) assert not os.path.exists(full_path)
def __init__(self, cachedir, read=True, write=True, read_through_write=True, delete=False, on_duplicate_key='skip'): super(DiskStore, self).__init__(read=read, write=write, read_through_write=read_through_write, delete=delete, on_duplicate_key=on_duplicate_key) self.cachedir = _abspath(cachedir) mkdirp(self.cachedir)
def test_mkdirp(tmpdir): mkdirp(os.path.join(tmpdir.strpath, 'ham')) mkdirp(os.path.join(tmpdir.strpath, 'ham')) mkdirp(os.path.join(tmpdir.strpath, 'spam', 'spam')) # Not all OSErrors are ignored with raises(OSError): mkdirp('')
def test_mkdirp(tmpdir): mkdirp(os.path.join(tmpdir.strpath, 'ham')) mkdirp(os.path.join(tmpdir.strpath, 'ham')) mkdirp(os.path.join(tmpdir.strpath, 'spam', 'spam')) # Not all OSErrors are ignored with raises(OSError): mkdirp('')
def mock_mkdir(self, directory): # pylint: disable=unused-argument """Mock mkdir.""" if directory.startswith("s3://"): # Skip bucket creation on purpose return return mkdirp(directory)
def __init__(self, cachedir, basepath, s3_config=None, s3fs=None, read=True, write=True, read_through_write=True, delete=False, on_duplicate_key='skip', cleanup_cachedir=False, always_check_s3=False): """ Parameters ---------- always_check_s3 : bool When True S3 will be checked with every __contains__ call. Otherwise it will short-circuit if the blob is found in the cachedir. For performance reasons this should always be set to False. The only reason why you would want to use this is if you are using a S3Store and a DiskStore in a ChainedStore together for some reason. Since the S3Store basically doubles as a DiskStore with it's cachedir chaining the two doesn't really make sense though. """ super(S3Store, self).__init__(read=read, write=write, read_through_write=read_through_write, delete=delete, on_duplicate_key=on_duplicate_key) self.always_check_s3 = always_check_s3 if s3fs: self.s3fs = s3fs elif s3_config is not None: self.s3fs = S3FileSystem(**s3_config) else: raise ValueError( "You must provide either s3_config or s3fs for a S3Store") self.cachedir = _abspath(cachedir) self.basepath = basepath self.cleanup_cachedir = cleanup_cachedir mkdirp(self.cachedir)
def test_mkdirp(tmpdir): mkdirp(os.path.join(tmpdir.strpath, 'ham')) mkdirp(os.path.join(tmpdir.strpath, 'ham')) mkdirp(os.path.join(tmpdir.strpath, 'spam', 'spam')) # Not all OSErrors are ignored assert_raises(OSError, mkdirp, '')
def __init__(self, cachedir, basepath, ssh_config=None, ssh_client=None, sftp_client=None, read=True, write=True, read_through_write=True, delete=False, on_duplicate_key='skip', cleanup_cachedir=False): super(SFTPStore, self).__init__(read=read, write=write, read_through_write=read_through_write, delete=delete, on_duplicate_key=on_duplicate_key) self.ssh_client = None if ssh_config is not None: self.ssh_client = _ssh_client(ssh_config) if self.ssh_client is not None: sftp_client = paramiko.SFTPClient.from_transport( self.ssh_client._transport) if sftp_client is not None: self.sftp_client = sftp_client else: # This is to allow testing the importing/subpackage aspect without # having to actually test the class by mocking an ssh connection. if cachedir == None and basepath == None: return raise ValueError( 'You must specify a SFTP client by passing in one of: sftp_client, ssh_config, ssh_client' ) self.cachedir = bs._abspath(cachedir) self.basepath = basepath self.cleanup_cachedir = cleanup_cachedir mkdirp(self.cachedir)
def test_mkdirp(): try: tmp = mkdtemp() mkdirp(os.path.join(tmp, "ham")) mkdirp(os.path.join(tmp, "ham")) mkdirp(os.path.join(tmp, "spam", "spam")) # Not all OSErrors are ignored nose.tools.assert_raises(OSError, mkdirp, "") finally: shutil.rmtree(tmp)
def test_mkdirp(): try: tmp = mkdtemp() mkdirp(os.path.join(tmp, "ham")) mkdirp(os.path.join(tmp, "ham")) mkdirp(os.path.join(tmp, "spam", "spam")) # Not all OSErrors are ignored nose.tools.assert_raises(OSError, mkdirp, "") finally: shutil.rmtree(tmp)
#!/usr/bin/env python from joblib.disk import mkdirp import provenance as p mkdirp('./remote-machine/sftp-artifacts') p.load_config({ 'blobstores': { 'sftp': { 'type': 'sftp', 'cachedir': '<This is the path on your local machine where you want the blobs to be cached, ex. /Users/me/provenance/examples/sftp/artifacts>', 'basepath': '< "" remote machine "" , ex. /home/me/artifacts>, you need to make sure that path directory exists.', 'read': True, 'write': True, 'read_through_write': False, 'delete': True, 'ssh_config': { 'hostname': '<your host here>', 'port': '<your port here as an int, defaults to 22 if excluded>', 'username': '******', 'password': '******', }, } }, 'artifact_repos': { 'local': {