def test_decode_source_spec(): # resolves datalad RIs: eq_(decode_source_spec('///subds'), dict(source='///subds', giturl=consts.DATASETS_TOPURL + 'subds', version=None, type='dataladri', default_destpath='subds')) assert_raises(NotImplementedError, decode_source_spec, '//custom/subds') # doesn't harm others: for url in ( 'http://example.com', '/absolute/path', 'file://localhost/some', 'localhost/another/path', '[email protected]/mydir', 'ssh://somewhe.re/else', 'git://github.com/datalad/testrepo--basic--r1', ): props = decode_source_spec(url) dest = props.pop('default_destpath') eq_(props, dict(source=url, version=None, giturl=url, type='giturl')) # RIA URIs with and without version specification dsid = '6d69ca68-7e85-11e6-904c-002590f97d84' for proto, loc, version in ( ('http', 'example.com', None), ('http', 'example.com', 'v1.0'), ('http', 'example.com', 'some_with@in_it'), ('ssh', 'example.com', 'some_with@in_it'), ): spec = 'ria+{}://{}{}{}'.format( proto, loc, '#{}'.format(dsid), '@{}'.format(version) if version else '') eq_(decode_source_spec(spec), dict( source=spec, giturl='{}://{}/{}/{}'.format( proto, loc, dsid[:3], dsid[3:]), version=version, default_destpath=dsid, type='ria') ) # not a dataset UUID assert_raises(ValueError, decode_source_spec, 'ria+http://example.com#123')
def _create_sibling_ria( ds, url, name, storage_sibling, storage_name, existing, shared, group, post_update_hook, trust_level, res_kwargs): # be safe across datasets res_kwargs = res_kwargs.copy() # update dataset res_kwargs['ds'] = ds if not isinstance(ds.repo, AnnexRepo): # No point in dealing with a special remote when there's no annex. # Note, that in recursive invocations this might only apply to some of # the datasets. Therefore dealing with it here rather than one level up. lgr.debug("No annex at %s. Ignoring special remote options.", ds.path) storage_sibling = False storage_name = None # parse target URL try: ssh_host, base_path, rewritten_url = verify_ria_url(url, ds.config) except ValueError as e: yield get_status_dict( status='error', message=str(e), **res_kwargs ) return base_path = Path(base_path) git_url = decode_source_spec( # append dataset id to url and use magic from clone-helper: url + '#{}'.format(ds.id), cfg=ds.config )['giturl'] # determine layout locations; go for a v1 layout repo_path, _, _ = get_layout_locations(1, base_path, ds.id) ds_siblings = [r['name'] for r in ds.siblings(result_renderer=None)] # Figure whether we are supposed to skip this very dataset if existing == 'skip' and ( name in ds_siblings or ( storage_name and storage_name in ds_siblings)): yield get_status_dict( status='notneeded', message="Skipped on existing sibling", **res_kwargs ) # if we skip here, nothing else can change that decision further # down return # figure whether we need to skip or error due an existing target repo before # we try to init a special remote. if ssh_host: from datalad import ssh_manager ssh = ssh_manager.get_connection( ssh_host, use_remote_annex_bundle=False) ssh.open() if existing in ['skip', 'error']: config_path = repo_path / 'config' # No .git -- if it's an existing repo in a RIA store it should be a # bare repo. # Theoretically we could have additional checks for whether we have # an empty repo dir or a non-bare repo or whatever else. if ssh_host: try: ssh('[ -e {p} ]'.format(p=quote_cmdlinearg(str(config_path)))) exists = True except CommandError: exists = False else: exists = config_path.exists() if exists: if existing == 'skip': # 1. not rendered by default # 2. message doesn't show up in ultimate result # record as shown by -f json_pp yield get_status_dict( status='notneeded', message="Skipped on existing remote " "directory {}".format(repo_path), **res_kwargs ) return else: # existing == 'error' yield get_status_dict( status='error', message="remote directory {} already " "exists.".format(repo_path), **res_kwargs ) return if storage_sibling == 'only': lgr.info("create storage sibling '{}' ...".format(name)) else: lgr.info("create sibling{} '{}'{} ...".format( 's' if storage_name else '', name, " and '{}'".format(storage_name) if storage_name else '', )) create_ds_in_store(SSHRemoteIO(ssh_host) if ssh_host else LocalIO(), base_path, ds.id, '2', '1') if storage_sibling: # we are using the main `name`, if the only thing we are creating # is the storage sibling srname = name if storage_sibling == 'only' else storage_name lgr.debug('init special remote {}'.format(srname)) special_remote_options = [ 'type=external', 'externaltype=ora', 'encryption=none', 'autoenable=true', 'url={}'.format(url)] try: ds.repo.init_remote( srname, options=special_remote_options) except CommandError as e: if existing == 'reconfigure' \ and 'git-annex: There is already a special remote' \ in e.stderr: # run enableremote instead lgr.debug( "special remote '%s' already exists. " "Run enableremote instead.", srname) # TODO: Use AnnexRepo.enable_remote (which needs to get # `options` first) ds.repo.call_annex([ 'enableremote', srname] + special_remote_options) else: yield get_status_dict( status='error', message="initremote failed.\nstdout: %s\nstderr: %s" % (e.stdout, e.stderr), **res_kwargs ) return if trust_level: ds.repo.call_annex([trust_level, srname]) # get uuid for use in bare repo's config uuid = ds.config.get("remote.{}.annex-uuid".format(srname)) if storage_sibling == 'only': # we can stop here, the rest of the function is about setting up # the git remote part of the sibling yield get_status_dict( status='ok', **res_kwargs, ) return # 2. create a bare repository in-store: lgr.debug("init bare repository") # TODO: we should prob. check whether it's there already. How? # Note: like the special remote itself, we assume local FS if no # SSH host is specified disabled_hook = repo_path / 'hooks' / 'post-update.sample' enabled_hook = repo_path / 'hooks' / 'post-update' if group: chgrp_cmd = "chgrp -R {} {}".format( quote_cmdlinearg(str(group)), quote_cmdlinearg(str(repo_path))) if ssh_host: ssh('cd {rootdir} && git init --bare{shared}'.format( rootdir=quote_cmdlinearg(str(repo_path)), shared=" --shared='{}'".format( quote_cmdlinearg(shared)) if shared else '' )) if storage_sibling: # write special remote's uuid into git-config, so clone can # which one it is supposed to be and enable it even with # fallback URL ssh("cd {rootdir} && git config datalad.ora-remote.uuid {uuid}" "".format(rootdir=quote_cmdlinearg(str(repo_path)), uuid=uuid)) if post_update_hook: ssh('mv {} {}'.format(quote_cmdlinearg(str(disabled_hook)), quote_cmdlinearg(str(enabled_hook)))) if group: # Either repository existed before or a new directory was # created for it, set its group to a desired one if was # provided with the same chgrp ssh(chgrp_cmd) else: gr = GitRepo(repo_path, create=True, bare=True, shared=shared if shared else None) if storage_sibling: # write special remote's uuid into git-config, so clone can # which one it is supposed to be and enable it even with # fallback URL gr.config.add("datalad.ora-remote.uuid", uuid, where='local') if post_update_hook: disabled_hook.rename(enabled_hook) if group: # TODO; do we need a cwd here? subprocess.run(chgrp_cmd, cwd=quote_cmdlinearg(ds.path)) # add a git remote to the bare repository # Note: needs annex-ignore! Otherwise we might push into dirhash # lower annex/object tree instead of mixed, since it's a bare # repo. This in turn would be an issue, if we want to pack the # entire thing into an archive. Special remote will then not be # able to access content in the "wrong" place within the archive lgr.debug("set up git remote") if name in ds_siblings: # otherwise we should have skipped or failed before assert existing == 'reconfigure' ds.config.set( "remote.{}.annex-ignore".format(name), value="true", where="local") ds.siblings( 'configure', name=name, url=git_url if ssh_host else str(repo_path), recursive=False, # Note, that this should be None if storage_sibling was not set publish_depends=storage_name, result_renderer=None, # Note, that otherwise a subsequent publish will report # "notneeded". fetch=True ) yield get_status_dict( status='ok', **res_kwargs, )
def _create_sibling_ria(ds, url, name, ria_remote, ria_remote_name, existing, shared, group, post_update_hook, res_kwargs): # be safe across datasets res_kwargs = res_kwargs.copy() # parse target URL try: ssh_host, base_path = verify_ria_url(url, ds.config) except ValueError as e: yield get_status_dict(status='error', message=str(e), **res_kwargs) return base_path = Path(base_path) git_url = decode_source_spec( # append dataset id to url and use magic from clone-helper: url + '#{}'.format(ds.id), cfg=ds.config)['giturl'] # go for a v1 layout repo_path, _, _ = get_layout_locations(1, base_path, ds.id) ds_siblings = [r['name'] for r in ds.siblings(result_renderer=None)] # Figure whether we are supposed to skip this very dataset if existing == 'skip' and (name in ds_siblings or (ria_remote_name and ria_remote_name in ds_siblings)): yield get_status_dict(status='notneeded', message="Skipped on existing sibling", **res_kwargs) # if we skip here, nothing else can change that decision further # down return # we might learn that some processing (remote repo creation is # not desired) skip = False lgr.info("create sibling{} '{}'{} ...".format( 's' if ria_remote_name else '', name, " and '{}'".format(ria_remote_name) if ria_remote_name else '', )) if ssh_host: from datalad import ssh_manager ssh = ssh_manager.get_connection(ssh_host, use_remote_annex_bundle=False) ssh.open() # determine layout locations if ria_remote: lgr.debug('init special remote {}'.format(ria_remote_name)) ria_remote_options = [ 'type=external', 'externaltype=ria', 'encryption=none', 'autoenable=true', 'url={}'.format(url) ] try: ds.repo.init_remote(ria_remote_name, options=ria_remote_options) except CommandError as e: if existing in ['replace', 'reconfigure'] \ and 'git-annex: There is already a special remote' \ in e.stderr: # run enableremote instead lgr.debug( "special remote '%s' already exists. " "Run enableremote instead.", ria_remote_name) # TODO: Use AnnexRepo.enable_remote (which needs to get # `options` first) cmd = ['git', 'annex', 'enableremote', ria_remote_name ] + ria_remote_options subprocess.run(cmd, cwd=quote_cmdlinearg(ds.repo.path)) else: yield get_status_dict( status='error', message="initremote failed.\nstdout: %s\nstderr: %s" % (e.stdout, e.stderr), **res_kwargs) return # 1. create remote object store: # Note: All it actually takes is to trigger the special # remote's `prepare` method once. # ATM trying to achieve that by invoking a minimal fsck. # TODO: - It's probably faster to actually talk to the special # remote (i.e. pretending to be annex and use # the protocol to send PREPARE) # - Alternatively we can create the remote directory and # ria version file directly, but this means # code duplication that then needs to be kept in sync # with ria-remote implementation. # - this leads to the third option: Have that creation # routine importable and callable from # ria-remote package without the need to actually # instantiate a RIARemote object lgr.debug("initializing object store") ds.repo.fsck(remote=ria_remote_name, fast=True, annex_options=['--exclude=*/*']) else: # with no special remote we currently need to create the # required directories # TODO: This should be cleaner once we have access to the # special remote's RemoteIO classes without # talking via annex if ssh_host: try: stdout, stderr = ssh('test -e {repo}'.format( repo=quote_cmdlinearg(str(repo_path)))) exists = True except CommandError as e: exists = False if exists: if existing == 'skip': # 1. not rendered by default # 2. message doesn't show up in ultimate result # record as shown by -f json_pp yield get_status_dict(status='notneeded', message="Skipped on existing remote " "directory {}".format(repo_path), **res_kwargs) skip = True elif existing in ['error', 'reconfigure']: yield get_status_dict( status='error', message="remote directory {} already " "exists.".format(repo_path), **res_kwargs) return elif existing == 'replace': ssh('chmod u+w -R {}'.format( quote_cmdlinearg(str(repo_path)))) ssh('rm -rf {}'.format(quote_cmdlinearg(str(repo_path)))) if not skip: ssh('mkdir -p {}'.format(quote_cmdlinearg(str(repo_path)))) else: if repo_path.exists(): if existing == 'skip': skip = True elif existing in ['error', 'reconfigure']: yield get_status_dict( status='error', message="remote directory {} already " "exists.".format(repo_path), **res_kwargs) return elif existing == 'replace': rmtree(repo_path) if not skip: repo_path.mkdir(parents=True) # Note, that this could have changed since last tested due to existing # remote dir if skip: return # 2. create a bare repository in-store: lgr.debug("init bare repository") # TODO: we should prob. check whether it's there already. How? # Note: like the special remote itself, we assume local FS if no # SSH host is specified disabled_hook = repo_path / 'hooks' / 'post-update.sample' enabled_hook = repo_path / 'hooks' / 'post-update' if group: chgrp_cmd = "chgrp -R {} {}".format(quote_cmdlinearg(str(group)), quote_cmdlinearg(str(repo_path))) if ssh_host: ssh('cd {rootdir} && git init --bare{shared}'.format( rootdir=quote_cmdlinearg(str(repo_path)), shared=" --shared='{}'".format(quote_cmdlinearg(shared)) if shared else '')) if post_update_hook: ssh('mv {} {}'.format(quote_cmdlinearg(str(disabled_hook)), quote_cmdlinearg(str(enabled_hook)))) if group: # Either repository existed before or a new directory was # created for it, set its group to a desired one if was # provided with the same chgrp ssh(chgrp_cmd) else: GitRepo(repo_path, create=True, bare=True, shared=" --shared='{}'".format(quote_cmdlinearg(shared)) if shared else None) if post_update_hook: disabled_hook.rename(enabled_hook) if group: # TODO; do we need a cwd here? subprocess.run(chgrp_cmd, cwd=quote_cmdlinearg(ds.path)) # add a git remote to the bare repository # Note: needs annex-ignore! Otherwise we might push into default # annex/object tree instead of directory type tree with dirhash # lower. This in turn would be an issue, if we want to pack the # entire thing into an archive. Special remote will then not be # able to access content in the "wrong" place within the archive lgr.debug("set up git remote") # TODO: # - This sibings call results in "[WARNING] Failed to determine # if datastore carries annex." # (see https://github.com/datalad/datalad/issues/4028) # => for now have annex-ignore configured before. Evtl. Allow # configure/add to include that option # - additionally there's # https://github.com/datalad/datalad/issues/3989, # where datalad-siblings might hang forever if name in ds_siblings: # otherwise we should have skipped or failed before assert existing in ['replace', 'reconfigure'] ds.config.set("remote.{}.annex-ignore".format(name), value="true", where="local") ds.siblings( 'configure', name=name, url=git_url if ssh_host else str(repo_path), recursive=False, # Note, that this should be None if ria_remote was not set publish_depends=ria_remote_name, result_renderer=None, # Note, that otherwise a subsequent publish will report # "notneeded". fetch=True) yield get_status_dict( status='ok', **res_kwargs, )