def __call__( action='query', dataset=None, name=None, url=None, pushurl=None, description=None, # TODO consider true, for now like add_sibling fetch=False, as_common_datasrc=None, publish_depends=None, publish_by_default=None, annex_wanted=None, annex_required=None, annex_group=None, annex_groupwanted=None, inherit=False, get_annex_info=True, recursive=False, recursion_limit=None): # TODO: Detect malformed URL and fail? # XXX possibly fail if fetch is False and as_common_datasrc if annex_groupwanted and not annex_group: raise InsufficientArgumentsError( "To set groupwanted, you need to provide annex_group option") # TODO catch invalid action specified action_worker_map = { 'query': _query_remotes, 'add': _add_remote, 'configure': _configure_remote, 'remove': _remove_remote, 'enable': _enable_remote, } # all worker strictly operate on a single dataset # anything that deals with hierarchies and/or dataset # relationships in general should be dealt with in here # at the top-level and vice versa worker = action_worker_map[action] dataset = require_dataset(dataset, check_installed=False, purpose='sibling configuration') refds_path = dataset.path res_kwargs = dict(refds=refds_path, logger=lgr) ds_name = basename(dataset.path) # do not form single list of datasets (with recursion results) to # give fastest possible response, for the precise of a long-all # function call ds = dataset for r in worker( # always copy signature to below to avoid bugs! ds, name, ds.repo.get_remotes(), # for top-level dataset there is no layout questions _mangle_urls(url, ds_name), _mangle_urls(pushurl, ds_name), fetch, description, as_common_datasrc, publish_depends, publish_by_default, annex_wanted, annex_required, annex_group, annex_groupwanted, inherit, get_annex_info, **res_kwargs): yield r if not recursive: return # do we have instructions to register siblings with some alternative # layout? replicate_local_structure = url and "%NAME" not in url for subds in dataset.subdatasets(fulfilled=True, recursive=recursive, recursion_limit=recursion_limit, result_xfm='datasets'): subds_name = relpath(subds.path, start=dataset.path) if replicate_local_structure: subds_url = slash_join(url, subds_name) subds_pushurl = slash_join(pushurl, subds_name) else: subds_url = \ _mangle_urls(url, '/'.join([ds_name, subds_name])) subds_pushurl = \ _mangle_urls(pushurl, '/'.join([ds_name, subds_name])) for r in worker( # always copy signature from above to avoid bugs subds, name, subds.repo.get_remotes(), subds_url, subds_pushurl, fetch, description, as_common_datasrc, publish_depends, publish_by_default, annex_wanted, annex_required, annex_group, annex_groupwanted, inherit, get_annex_info, **res_kwargs): yield r
def __call__(sshurl, name=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, recursion_limit=None, existing='error', shared=None, group=None, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None, annex_wanted=None, annex_group=None, annex_groupwanted=None, inherit=False, since=None): # # nothing without a base dataset # ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') refds_path = ds.path # # all checks that are possible before we start parsing the dataset # # possibly use sshurl to get the name in case if not specified if not sshurl: if not inherit: raise InsufficientArgumentsError( "needs at least an SSH URL, if no inherit option" ) if name is None: raise ValueError( "Neither SSH URL, nor the name of sibling to inherit from " "was specified" ) # It might well be that we already have this remote setup try: sshurl = CreateSibling._get_remote_url(ds, name) except Exception as exc: lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc)) elif inherit: raise ValueError( "For now, for clarity not allowing specifying a custom sshurl " "while inheriting settings" ) # may be could be safely dropped -- still WiP if not sshurl: # TODO: may be more back up before _prep? super_ds = ds.get_superdataset() if not super_ds: raise ValueError( "Could not determine super dataset for %s to inherit URL" % ds ) super_url = CreateSibling._get_remote_url(super_ds, name) # for now assuming hierarchical setup # (TODO: to be able to destinguish between the two, probably # needs storing datalad.*.target_dir to have %RELNAME in there) sshurl = slash_join(super_url, relpath(ds.path, super_ds.path)) # check the login URL sshri = RI(sshurl) if not is_ssh(sshri): raise ValueError( "Unsupported SSH URL: '{0}', " "use ssh://host/path or host:path syntax".format(sshurl)) if not name: # use the hostname as default remote name name = sshri.hostname lgr.debug( "No sibling name given, use URL hostname '%s' as sibling name", name) if since == '': # consider creating siblings only since the point of # the last update # XXX here we assume one to one mapping of names from local branches # to the remote active_branch = ds.repo.get_active_branch() since = '%s/%s' % (name, active_branch) # # parse the base dataset to find all subdatasets that need processing # to_process = [] for ap in AnnotatePaths.__call__( dataset=refds_path, # only a single path! path=refds_path, recursive=recursive, recursion_limit=recursion_limit, action='create_sibling', # both next should not happen anyways unavailable_path_status='impossible', nondataset_path_status='error', modified=since, return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent': # this can happen when there is `since`, but we have no # use for anything but datasets here continue checkds_remotes = Dataset(ap['path']).repo.get_remotes() \ if ap.get('state', None) != 'absent' \ else [] if publish_depends: # make sure dependencies are valid # TODO: inherit -- we might want to automagically create # those dependents as well??? unknown_deps = set(assure_list(publish_depends)).difference(checkds_remotes) if unknown_deps: ap['status'] = 'error' ap['message'] = ( 'unknown sibling(s) specified as publication dependency: %s', unknown_deps) yield ap continue if name in checkds_remotes and existing in ('error', 'skip'): ap['status'] = 'error' if existing == 'error' else 'notneeded' ap['message'] = ( "sibling '%s' already configured (specify alternative name, or force " "reconfiguration via --existing", name) yield ap continue to_process.append(ap) if not to_process: # we ruled out all possibilities # TODO wait for gh-1218 and make better return values lgr.info("No datasets qualify for sibling creation. " "Consider different settings for --existing " "or --since if this is unexpected") return if target_dir is None: if sshri.path: target_dir = sshri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = "%RELNAME" not in target_dir # request ssh connection: lgr.info("Connecting ...") assert(sshurl is not None) # delayed anal verification ssh = ssh_manager.get_connection(sshurl) if not ssh.get_annex_version(): raise MissingExternalDependency( 'git-annex', msg='on the remote system') # # all checks done and we have a connection, now do something # # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) yielded = set() remote_repos_to_run_hook_for = [] for currentds_ap in \ sorted(to_process, key=lambda x: x['path'].count('/')): current_ds = Dataset(currentds_ap['path']) path = _create_dataset_sibling( name, current_ds, ds.path, ssh, replicate_local_structure, sshri, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, ui, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit ) if not path: # nothing new was created # TODO is 'notneeded' appropriate in this case? currentds_ap['status'] = 'notneeded' # TODO explain status in 'message' yield currentds_ap yielded.add(currentds_ap['path']) continue remote_repos_to_run_hook_for.append((path, currentds_ap)) # publish web-interface to root dataset on publication server if current_ds.path == ds.path and ui: lgr.info("Uploading web interface to %s" % path) try: CreateSibling.upload_web_interface(path, ssh, shared, ui) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to push web interface to the remote datalad repository (%s)", exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue # in reverse order would be depth first lgr.info("Running post-update hooks in all created siblings") # TODO: add progressbar for path, currentds_ap in remote_repos_to_run_hook_for[::-1]: # Trigger the hook lgr.debug("Running hook for %s (if exists and executable)", path) try: ssh("cd {} " "&& ( [ -x hooks/post-update ] && hooks/post-update || : )" "".format(sh_quote(_path_(path, ".git")))) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to run post-update hook under remote path %s (%s)", path, exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue if not currentds_ap['path'] in yielded: # if we were silent until now everything is just splendid currentds_ap['status'] = 'ok' yield currentds_ap
def __call__(sshurl, *, name=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, recursion_limit=None, existing='error', shared=None, group=None, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None, annex_wanted=None, annex_group=None, annex_groupwanted=None, inherit=False, since=None): if ui: # the webui has been moved to the deprecated extension try: from datalad_deprecated.sibling_webui \ import upload_web_interface except Exception as e: # we could just test for ModuleNotFoundError (which should be # all that would happen with PY3.6+, but be a little more robust # and use the pattern from duecredit if type(e).__name__ not in ('ImportError', 'ModuleNotFoundError'): lgr.error( "Failed to import datalad_deprecated.sibling_webui " "due to %s", str(e)) raise RuntimeError( "The DataLad web UI has been moved to an extension " "package. Please install the Python package " "`datalad_deprecated` to be able to deploy it.") # push uses '^' to annotate the previous pushed committish, and None for default # behavior. '' was/is (to be deprecated) used in `publish` and 'create-sibling'. # Alert user about the mistake if since == '': # deprecation was added prior 0.16.0 import warnings warnings.warn("'since' should point to commitish or use '^'.", DeprecationWarning) since = '^' # # nothing without a base dataset # ds = require_dataset(dataset, check_installed=True, purpose='create sibling(s)') refds_path = ds.path # # all checks that are possible before we start parsing the dataset # if since and not recursive: raise ValueError("The use of 'since' requires 'recursive'") # possibly use sshurl to get the name in case if not specified if not sshurl: if not inherit: raise InsufficientArgumentsError( "needs at least an SSH URL, if no inherit option") if name is None: raise ValueError( "Neither SSH URL, nor the name of sibling to inherit from " "was specified") # It might well be that we already have this remote setup try: sshurl = CreateSibling._get_remote_url(ds, name) except Exception as exc: ce = CapturedException(exc) lgr.debug('%s does not know about url for %s: %s', ds, name, ce) elif inherit: raise ValueError( "For now, for clarity not allowing specifying a custom sshurl " "while inheriting settings") # may be could be safely dropped -- still WiP if not sshurl: # TODO: may be more back up before _prep? super_ds = ds.get_superdataset() if not super_ds: raise ValueError( "Could not determine super dataset for %s to inherit URL" % ds) super_url = CreateSibling._get_remote_url(super_ds, name) # for now assuming hierarchical setup # (TODO: to be able to distinguish between the two, probably # needs storing datalad.*.target_dir to have %RELNAME in there) sshurl = slash_join(super_url, relpath(refds_path, super_ds.path)) # check the login URL sibling_ri = RI(sshurl) ssh_sibling = is_ssh(sibling_ri) if not (ssh_sibling or isinstance(sibling_ri, PathRI)): raise ValueError( "Unsupported SSH URL or path: '{0}', " "use ssh://host/path, host:path or path syntax".format(sshurl)) if not name: name = sibling_ri.hostname if ssh_sibling else "local" lgr.info("No sibling name given. Using %s'%s' as sibling name", "URL hostname " if ssh_sibling else "", name) if since == '^': # consider creating siblings only since the point of # the last update # XXX here we assume one to one mapping of names from local branches # to the remote active_branch = ds.repo.get_active_branch() since = '%s/%s' % (name, active_branch) to_process = [] if recursive: # # parse the base dataset to find all subdatasets that need processing # cand_ds = [ Dataset(r['path']) for r in diff_dataset( ds, fr=since, to='HEAD', # w/o False we might not follow into new subdatasets # which do not have that remote yet setup, # see https://github.com/datalad/datalad/issues/6596 constant_refs=False, # save cycles, we are only looking for datasets annex=None, untracked='no', recursive=True, datasets_only=True, ) # not installed subdatasets would be 'clean' so we would skip them if r.get('type') == 'dataset' and r.get('state', None) != 'clean' ] if not since: # not only subdatasets cand_ds = [ds] + cand_ds else: # only the current ds cand_ds = [ds] # check remotes setup() for d in cand_ds: d_repo = d.repo if d_repo is None: continue checkds_remotes = d.repo.get_remotes() res = dict( action='create_sibling', path=d.path, type='dataset', ) if publish_depends: # make sure dependencies are valid # TODO: inherit -- we might want to automagically create # those dependents as well??? unknown_deps = set( ensure_list(publish_depends)).difference(checkds_remotes) if unknown_deps: yield dict( res, status='error', message=('unknown sibling(s) specified as publication ' 'dependency: %s', unknown_deps), ) continue if name in checkds_remotes and existing in ('error', 'skip'): yield dict( res, sibling_name=name, status='error' if existing == 'error' else 'notneeded', message=( "sibling '%s' already configured (specify alternative " "name, or force reconfiguration via --existing", name), ) continue to_process.append(res) if not to_process: # we ruled out all possibilities # TODO wait for gh-1218 and make better return values lgr.info("No datasets qualify for sibling creation. " "Consider different settings for --existing " "or --since if this is unexpected") return if ssh_sibling: # request ssh connection: lgr.info("Connecting ...") shell = ssh_manager.get_connection(sshurl) else: shell = _RunnerAdapter() sibling_ri.path = str(resolve_path(sibling_ri.path, dataset)) if target_dir: target_dir = opj(sibling_ri.path, target_dir) if target_dir is None: if sibling_ri.path: target_dir = sibling_ri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = "%RELNAME" not in target_dir if not shell.get_annex_version(): raise MissingExternalDependency( 'git-annex', msg="It's required on the {} machine to create a sibling". format('remote' if ssh_sibling else 'local')) # # all checks done and we have a connection, now do something # # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) yielded = set() remote_repos_to_run_hook_for = [] for currentds_ap in \ sorted(to_process, key=lambda x: x['path'].count('/')): current_ds = Dataset(currentds_ap['path']) path = _create_dataset_sibling( name, current_ds, refds_path, shell, replicate_local_structure, sibling_ri, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, ui, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit) currentds_ap["sibling_name"] = name if not path: # nothing new was created # TODO is 'notneeded' appropriate in this case? currentds_ap['status'] = 'notneeded' # TODO explain status in 'message' yield currentds_ap yielded.add(currentds_ap['path']) continue remote_repos_to_run_hook_for.append((path, currentds_ap)) # publish web-interface to root dataset on publication server if current_ds.path == refds_path and ui: from datalad_deprecated.sibling_webui import upload_web_interface lgr.info("Uploading web interface to %s", path) try: upload_web_interface(path, shell, shared, ui) except CommandError as e: ce = CapturedException(e) currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to push web interface to the remote datalad repository (%s)", ce) currentds_ap['exception'] = ce yield currentds_ap yielded.add(currentds_ap['path']) continue # in reverse order would be depth first lgr.info("Running post-update hooks in all created siblings") # TODO: add progressbar for path, currentds_ap in remote_repos_to_run_hook_for[::-1]: # Trigger the hook lgr.debug("Running hook for %s (if exists and executable)", path) try: shell( "cd {} " "&& ( [ -x hooks/post-update ] && hooks/post-update || true )" "".format(sh_quote(_path_(path, ".git")))) except CommandError as e: ce = CapturedException(e) currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to run post-update hook under remote path %s (%s)", path, ce) currentds_ap['exception'] = ce yield currentds_ap yielded.add(currentds_ap['path']) continue if not currentds_ap['path'] in yielded: # if we were silent until now everything is just splendid currentds_ap['status'] = 'ok' yield currentds_ap
def __call__( action='query', dataset=None, name=None, url=None, pushurl=None, description=None, # TODO consider true, for now like add_sibling fetch=False, as_common_datasrc=None, publish_depends=None, publish_by_default=None, annex_wanted=None, annex_required=None, annex_group=None, annex_groupwanted=None, inherit=False, get_annex_info=True, recursive=False, recursion_limit=None): # TODO: Detect malformed URL and fail? # XXX possibly fail if fetch is False and as_common_datasrc if annex_groupwanted and not annex_group: raise InsufficientArgumentsError( "To set groupwanted, you need to provide annex_group option") # TODO catch invalid action specified action_worker_map = { 'query': _query_remotes, 'add': _add_remote, 'configure': _configure_remote, 'remove': _remove_remote, 'enable': _enable_remote, } # all worker strictly operate on a single dataset # anything that deals with hierarchies and/or dataset # relationships in general should be dealt with in here # at the top-level and vice versa worker = action_worker_map[action] dataset = require_dataset( dataset, check_installed=False, purpose='sibling configuration') refds_path = dataset.path res_kwargs = dict(refds=refds_path, logger=lgr) ds_name = basename(dataset.path) # do not form single list of datasets (with recursion results) to # give fastest possible response, for the precise of a long-all # function call ds = dataset for r in worker( # always copy signature to below to avoid bugs! ds, name, ds.repo.get_remotes(), # for top-level dataset there is no layout questions _mangle_urls(url, ds_name), _mangle_urls(pushurl, ds_name), fetch, description, as_common_datasrc, publish_depends, publish_by_default, annex_wanted, annex_required, annex_group, annex_groupwanted, inherit, get_annex_info, **res_kwargs): yield r if not recursive: return # do we have instructions to register siblings with some alternative # layout? replicate_local_structure = url and "%NAME" not in url for subds in dataset.subdatasets( fulfilled=True, recursive=recursive, recursion_limit=recursion_limit, result_xfm='datasets'): subds_name = relpath(subds.path, start=dataset.path) if replicate_local_structure: subds_url = slash_join(url, subds_name) subds_pushurl = slash_join(pushurl, subds_name) else: subds_url = \ _mangle_urls(url, '/'.join([ds_name, subds_name])) subds_pushurl = \ _mangle_urls(pushurl, '/'.join([ds_name, subds_name])) for r in worker( # always copy signature from above to avoid bugs subds, name, subds.repo.get_remotes(), subds_url, subds_pushurl, fetch, description, as_common_datasrc, publish_depends, publish_by_default, annex_wanted, annex_required, annex_group, annex_groupwanted, inherit, get_annex_info, **res_kwargs): yield r
def __call__( action='query', *, dataset=None, name=None, url=None, pushurl=None, description=None, # TODO consider true, for now like add_sibling fetch=False, as_common_datasrc=None, publish_depends=None, publish_by_default=None, annex_wanted=None, annex_required=None, annex_group=None, annex_groupwanted=None, inherit=False, get_annex_info=True, recursive=False, recursion_limit=None): # TODO: Detect malformed URL and fail? # XXX possibly fail if fetch is False and as_common_datasrc if annex_groupwanted and not annex_group: raise InsufficientArgumentsError( "To set groupwanted, you need to provide annex_group option") # TODO catch invalid action specified action_worker_map = { 'query': _query_remotes, 'add': _add_remote, 'configure': _configure_remote, 'remove': _remove_remote, 'enable': _enable_remote, } # all worker strictly operate on a single dataset # anything that deals with hierarchies and/or dataset # relationships in general should be dealt with in here # at the top-level and vice versa worker = action_worker_map[action] ds = require_dataset( dataset, # it makes no sense to use this command without a dataset check_installed=True, purpose='configure sibling') refds_path = ds.path res_kwargs = dict(refds=refds_path, logger=lgr) ds_name = op.basename(ds.path) # do not form single list of datasets (with recursion results) to # give fastest possible response, for the precise of a long-all # function call # minimize expensive calls to .repo ds_repo = ds.repo # prepare common parameterization package for all worker calls worker_kwargs = dict( name=name, fetch=fetch, description=description, as_common_datasrc=as_common_datasrc, publish_depends=publish_depends, publish_by_default=publish_by_default, annex_wanted=annex_wanted, annex_required=annex_required, annex_group=annex_group, annex_groupwanted=annex_groupwanted, inherit=inherit, get_annex_info=get_annex_info, res_kwargs=res_kwargs, ) yield from worker( ds=ds, repo=ds_repo, known_remotes=ds_repo.get_remotes(), # for top-level dataset there is no layout questions url=_mangle_urls(url, ds_name), pushurl=_mangle_urls(pushurl, ds_name), **worker_kwargs) if not recursive: return # do we have instructions to register siblings with some alternative # layout? replicate_local_structure = url and "%NAME" not in url subds_pushurl = None for subds in ds.subdatasets(state='present', recursive=recursive, recursion_limit=recursion_limit, result_xfm='datasets'): subds_repo = subds.repo subds_name = op.relpath(subds.path, start=ds.path) if replicate_local_structure: subds_url = slash_join(url, subds_name) if pushurl: subds_pushurl = slash_join(pushurl, subds_name) else: subds_url = \ _mangle_urls(url, '/'.join([ds_name, subds_name])) subds_pushurl = \ _mangle_urls(pushurl, '/'.join([ds_name, subds_name])) yield from worker(ds=subds, repo=subds_repo, known_remotes=subds_repo.get_remotes(), url=subds_url, pushurl=subds_pushurl, **worker_kwargs)