def _install_subds_from_flexible_source(ds, sm, **kwargs): """Tries to obtain a given subdataset from several meaningful locations Parameters ---------- ds : Dataset Parent dataset of to-be-installed subdataset. sm : dict Submodule record as produced by `subdatasets()`. **kwargs Passed onto clone() """ sm_path = op.relpath(sm['path'], start=sm['parentds']) # compose a list of candidate clone URLs clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm) # prevent inevitable exception from `clone` dest_path = op.join(ds.path, sm_path) clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path] if not clone_urls: # yield error yield get_status_dict( action='install', ds=ds, status='error', message=("Have got no candidates to install subdataset %s from.", sm_path), logger=lgr, ) return for res in clone_dataset(clone_urls_, Dataset(dest_path), **kwargs): # make sure to fix a detached HEAD before yielding the install success # result. The resetting of the branch would undo any change done # to the repo by processing in response to the result if res.get('action', None) == 'install' and \ res.get('status', None) == 'ok' and \ res.get('type', None) == 'dataset' and \ res.get('path', None) == dest_path: _fixup_submodule_dotgit_setup(ds, sm_path) # do fancy update lgr.debug( "Update cloned subdataset {0} in parent".format(dest_path)) ds.repo.update_submodule(sm_path, init=True) yield res subds = Dataset(dest_path) if not subds.is_installed(): lgr.debug('Desired subdataset %s did not materialize, stopping', subds) return # check whether clone URL generators were involved cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)] if cand_cfg: # get a handle on the configuration that is specified in the # dataset itself (local and dataset) super_cfg = ConfigManager(dataset=ds, source='dataset-local') need_reload = False for rec in cand_cfg: # check whether any of this configuration originated from the # superdataset. if so, inherit the config in the new subdataset # clone. if not, keep things clean in order to be able to move with # any outside configuration change for c in ('datalad.get.subdataset-source-candidate-{}{}'.format( rec['cost'], rec['name']), 'datalad.get.subdataset-source-candidate-{}'.format( rec['name'])): if c in super_cfg.keys(): subds.config.set(c, super_cfg.get(c), where='local', reload=False) need_reload = True break if need_reload: subds.config.reload(force=True)
def _install_subds_from_flexible_source(ds, sm, **kwargs): """Tries to obtain a given subdataset from several meaningful locations Parameters ---------- ds : Dataset Parent dataset of to-be-installed subdataset. sm : dict Submodule record as produced by `subdatasets()`. **kwargs Passed onto clone() """ sm_path = op.relpath(sm['path'], start=sm['parentds']) # compose a list of candidate clone URLs clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm) # prevent inevitable exception from `clone` dest_path = op.join(ds.path, sm_path) clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path] if not clone_urls: # yield error yield get_status_dict( action='install', ds=ds, status='error', message=("Have got no candidates to install subdataset %s from.", sm_path), logger=lgr, ) return for res in clone_dataset(clone_urls_, Dataset(dest_path), cfg=ds.config, checkout_gitsha=sm['gitshasum'], **kwargs): if res.get('action', None) == 'install' and \ res.get('status', None) == 'ok' and \ res.get('type', None) == 'dataset' and \ res.get('path', None) == dest_path: _fixup_submodule_dotgit_setup(ds, sm_path) section_name = 'submodule.{}'.format(sm['gitmodule_name']) # register the submodule as "active" in the superdataset ds.config.set( '{}.active'.format(section_name), 'true', reload=False, force=True, where='local', ) ds.config.set( '{}.url'.format(section_name), # record the actual source URL of the successful clone # and not a funky prediction based on the parent ds # like ds.repo.update_submodule() would do (does not # accept a URL) res['source']['giturl'], reload=True, force=True, where='local', ) yield res subds = Dataset(dest_path) if not subds.is_installed(): lgr.debug('Desired subdataset %s did not materialize, stopping', subds) return # check whether clone URL generators were involved cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)] if cand_cfg: # get a handle on the configuration that is specified in the # dataset itself (local and dataset) super_cfg = ConfigManager(dataset=ds, source='dataset-local') need_reload = False for rec in cand_cfg: # check whether any of this configuration originated from the # superdataset. if so, inherit the config in the new subdataset # clone. if not, keep things clean in order to be able to move with # any outside configuration change for c in ('datalad.get.subdataset-source-candidate-{}{}'.format( rec['cost'], rec['name']), 'datalad.get.subdataset-source-candidate-{}'.format( rec['name'])): if c in super_cfg.keys(): subds.config.set(c, super_cfg.get(c), where='local', reload=False) need_reload = True break if need_reload: subds.config.reload(force=True)
def _install_subds_from_flexible_source(ds, sm, **kwargs): """Tries to obtain a given subdataset from several meaningful locations Parameters ---------- ds : Dataset Parent dataset of to-be-installed subdataset. sm : dict Submodule record as produced by `subdatasets()`. **kwargs Passed onto clone() """ sm_path = op.relpath(sm['path'], start=sm['parentds']) # compose a list of candidate clone URLs clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm) # prevent inevitable exception from `clone` dest_path = op.join(ds.path, sm_path) clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path] if not clone_urls: # yield error yield get_status_dict( action='install', ds=ds, status='error', message=("Have got no candidates to install subdataset %s from.", sm_path), logger=lgr, ) return for res in clone_dataset(clone_urls_, Dataset(dest_path), cfg=ds.config, **kwargs): # make sure to fix a detached HEAD before yielding the install success # result. The resetting of the branch would undo any change done # to the repo by processing in response to the result if res.get('action', None) == 'install' and \ res.get('status', None) == 'ok' and \ res.get('type', None) == 'dataset' and \ res.get('path', None) == dest_path: _fixup_submodule_dotgit_setup(ds, sm_path) target_commit = sm['gitshasum'] lgr.debug( "Update cloned subdataset {0} in parent".format(dest_path)) section_name = 'submodule.{}'.format(sm['gitmodule_name']) # do not use `git-submodule update --init`, it would make calls # to git-config which will not obey datalad inter-process locks for # modifying .git/config sub = GitRepo(res['path']) # record what branch we were on right after the clone # TODO instead of the active branch, this should first consider # a configured branch in the submodule record of the superdataset sub_orig_branch = sub.get_active_branch() # if we are on a branch this hexsha will be the tip of that branch sub_orig_hexsha = sub.get_hexsha() if sub_orig_hexsha != target_commit: # make sure we have the desired commit locally # expensive and possibly error-prone fetch conditional on cheap # local check if not sub.commit_exists(target_commit): try: sub.fetch(remote='origin', refspec=target_commit) except CommandError: pass # instead of inspecting the fetch results for possible ways # with which it could failed to produced the desired result # let's verify the presence of the commit directly, we are in # expensive-land already anyways if not sub.commit_exists(target_commit): res.update( status='error', message= ('Target commit %s does not exist in the clone, and ' 'a fetch that commit from origin failed', target_commit[:8]), ) yield res # there is nothing we can do about this # MIH thinks that removing the clone is not needed, as a likely # next step will have to be a manual recovery intervention # and not another blind attempt continue # checkout the desired commit sub.call_git(['checkout', target_commit]) # did we detach? # XXX: This is a less generic variant of a part of # GitRepo.update_submodule(). It makes use of already available # information and trusts the existence of the just cloned repo # and avoids (redoing) some safety checks if sub_orig_branch and not sub.get_active_branch(): # trace if current state is a predecessor of the branch_hexsha lgr.debug( "Detached HEAD after updating submodule %s " "(original branch: %s)", sub, sub_orig_branch) if sub.get_merge_base([sub_orig_hexsha, target_commit]) == target_commit: # TODO: config option? # MIH: There is no real need here. IMHO this should all not # happen, unless the submodule record has a branch # configured. And Datalad should leave such a record, when # a submodule is registered. # we assume the target_commit to be from the same branch, # because it is an ancestor -- update that original branch # to point to the target_commit, and update HEAD to point to # that location -- this readies the subdataset for # further modification lgr.info( "Reset subdataset branch '%s' to %s (from %s) to " "avoid a detached HEAD", sub_orig_branch, target_commit[:8], sub_orig_hexsha[:8]) branch_ref = 'refs/heads/%s' % sub_orig_branch sub.update_ref(branch_ref, target_commit) sub.update_ref('HEAD', branch_ref, symbolic=True) else: lgr.warning( "%s has a detached HEAD, because the recorded " "subdataset state %s has no unique ancestor with " "branch '%s'", sub, target_commit[:8], sub_orig_branch) # register the submodule as "active" in the superdataset ds.config.set( '{}.active'.format(section_name), 'true', reload=False, force=True, where='local', ) ds.config.set( '{}.url'.format(section_name), # record the actual source URL of the successful clone # and not a funky prediction based on the parent ds # like ds.repo.update_submodule() would do (does not # accept a URL) res['source']['giturl'], reload=True, force=True, where='local', ) yield res subds = Dataset(dest_path) if not subds.is_installed(): lgr.debug('Desired subdataset %s did not materialize, stopping', subds) return # check whether clone URL generators were involved cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)] if cand_cfg: # get a handle on the configuration that is specified in the # dataset itself (local and dataset) super_cfg = ConfigManager(dataset=ds, source='dataset-local') need_reload = False for rec in cand_cfg: # check whether any of this configuration originated from the # superdataset. if so, inherit the config in the new subdataset # clone. if not, keep things clean in order to be able to move with # any outside configuration change for c in ('datalad.get.subdataset-source-candidate-{}{}'.format( rec['cost'], rec['name']), 'datalad.get.subdataset-source-candidate-{}'.format( rec['name'])): if c in super_cfg.keys(): subds.config.set(c, super_cfg.get(c), where='local', reload=False) need_reload = True break if need_reload: subds.config.reload(force=True)