def __call__( path=None, dataset=None, recursive=False, check=True, if_dirty='save-before'): if dataset and not path: # act on the whole dataset if nothing else was specified path = dataset.path if isinstance(dataset, Dataset) else dataset content_by_ds, unavailable_paths = Interface._prep( path=path, dataset=dataset, recursive=recursive) if unavailable_paths: lgr.warning('ignored non-installed paths: %s', unavailable_paths) # upfront sanity and compliance checks if path_is_under(content_by_ds.keys()): # behave like `rm` and refuse to remove where we are raise ValueError( "refusing to uninstall current or parent directory") # check that we have no top-level datasets and not files to process args_ok = True for ds_path in content_by_ds: ds = Dataset(ds_path) paths = content_by_ds[ds_path] if ds_path not in paths: lgr.error( "will not act on files at %s (consider the `drop` command)", paths) args_ok = False if not ds.get_superdataset( datalad_only=False, topmost=False): lgr.error( "will not uninstall top-level dataset at %s (consider the `remove` command)", ds.path) args_ok = False if not args_ok: raise ValueError( 'inappropriate arguments, see previous error message(s)') handle_dirty_datasets( content_by_ds, mode=if_dirty, base=dataset) results = [] # iterate over all datasets, starting at the bottom # to deinit contained submodules first for ds_path in sorted(content_by_ds, reverse=True): ds = Dataset(ds_path) paths = content_by_ds[ds_path] results.extend( # we confirmed the super dataset presence above _uninstall_dataset(ds, check=check, has_super=True)) # there is nothing to save at the end return results
def __call__( path=None, dataset=None, recursive=False, check=True, save=True, message=None, if_dirty='save-before'): res_kwargs = dict(action='remove', logger=lgr) if not dataset and not path: raise InsufficientArgumentsError( "insufficient information for `remove`: requires at least a path or dataset") refds_path = Interface.get_refds_path(dataset) res_kwargs['refds'] = refds_path if refds_path and not path and not GitRepo.is_valid_repo(refds_path): # nothing here, nothing to remove yield get_status_dict(path=refds_path, status='notneeded', **res_kwargs) return if refds_path and not path: # act on the whole dataset if nothing else was specified # TODO i think that would happen automatically in annotation? path = refds_path to_process = [] for ap in AnnotatePaths.__call__( path=path, dataset=refds_path, recursive=recursive, # we only ever want to discover immediate subdatasets, the rest # will happen in `uninstall` recursion_limit=1, action='remove', unavailable_path_status='', nondataset_path_status='error', return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue if ap.get('state', None) == 'absent' and \ ap.get('parentds', None) is None: # nothing exists at location, and there is no parent to # remove from ap['status'] = 'notneeded' ap['message'] = "path does not exist and is not in a dataset" yield ap continue if ap.get('raw_input', False) and ap.get('type', None) == 'dataset': # make sure dataset sorting yields a dedicted entry for this one ap['process_content'] = True to_process.append(ap) if not to_process: # nothing left to do, potentially all errored before return if path_is_under([ap['path'] for ap in to_process]): # behave like `rm` and refuse to remove where we are raise ValueError( "refusing to uninstall current or parent directory") # now sort into datasets so we can process them one by one content_by_ds, ds_props, completed, nondataset_paths = \ annotated2content_by_ds( to_process, refds_path=refds_path) assert(not completed) # iterate over all datasets, starting at the bottom # to make the removal of dataset content known upstairs to_save = [] # track which submodules we have removed in the process, to avoid # failure in case we revisit them due to a subsequent path argument subm_removed = [] for ds_path in sorted(content_by_ds, reverse=True): ds = Dataset(ds_path) paths = content_by_ds[ds_path] to_reporemove = dict() # PLAN any dataset that was not raw_input, uninstall (passing recursive flag) # if dataset itself is in paths, skip any nondataset # sort reverse so we get subdatasets first for ap in sorted(paths, key=lambda x: x['path'], reverse=True): if ap.get('type', None) == 'dataset': # entire dataset needs to go, uninstall if present, pass recursive! uninstall_failed = False if ap['path'] == refds_path or \ (refds_path is None and ap.get('raw_input', False)): # top-level handling, cannot use regular uninstall call, as # it will refuse to uninstall a top-level dataset # and rightfully so, it is really a remove in that case # bypass all the safety by using low-level helper for r in _uninstall_dataset(ds, check=check, has_super=False, **res_kwargs): if r['status'] in ('impossible', 'error'): # we need to inspect if something went wrong, in order # to prevent failure from removing a non-empty dir below, # but at the same time allow for continued processing uninstall_failed = True r['refds'] = refds_path yield r # recheck that it wasn't removed during a previous iteration elif ap.get('state', None) != 'absent' and GitRepo.is_valid_repo(ap['path']): # anything that is not the top-level -> regular uninstall # this is for subdatasets of the to-be-removed dataset # we want to simply uninstall them in a regular manner for r in Uninstall.__call__( ap['path'], dataset=refds_path, recursive=recursive, check=check, if_dirty=if_dirty, result_xfm=None, result_filter=None, on_failure='ignore'): if r['status'] in ('impossible', 'error'): # we need to inspect if something went wrong, in order # to prevent failure from removing a non-empty dir below, # but at the same time allow for continued processing uninstall_failed = True yield r if not ap.get('raw_input', False): # we only ever want to actually unregister subdatasets that # were given explicitly continue if not uninstall_failed and \ not ap['path'] in subm_removed and \ refds_path and \ ap.get('parentds', None) and \ not (relpath(ap['path'], start=refds_path).startswith(pardir) or ap['path'] == refds_path) and \ ap.get('registered_subds', False): # strip from superdataset, but only if a dataset was given explcitly # as in "remove from this dataset", but not when just a path was given # as in "remove from the filesystem" subds_relpath = relpath(ap['path'], start=ap['parentds']) # remove submodule reference parentds = Dataset(ap['parentds']) # play safe, will fail on dirty parentds.repo.deinit_submodule(ap['path']) # remove now empty submodule link parentds.repo.remove(ap['path']) # make a record that we removed this already, should it be # revisited via another path argument, because do not reannotate # the paths after every removal subm_removed.append(ap['path']) yield dict(ap, status='ok', **res_kwargs) # need .gitmodules update in parent to_save.append(dict( path=opj(parentds.path, '.gitmodules'), parents=parentds.path, type='file')) # and the removal itself needs to be committed # inform `save` that it is OK that this path # doesn't exist on the filesystem anymore ap['unavailable_path_status'] = '' ap['process_content'] = False to_save.append(ap) if not uninstall_failed and exists(ap['path']): # could be an empty dir in case an already uninstalled subdataset # got removed rmdir(ap['path']) else: # anything that is not a dataset can simply be passed on to_reporemove[ap['path']] = ap # avoid unnecessary git calls when there is nothing to do if to_reporemove: if check and hasattr(ds.repo, 'drop'): for r in _drop_files(ds, list(to_reporemove), check=True): if r['status'] == 'error': # if drop errored on that path, we can't remove it to_reporemove.pop(r['path'], 'avoidKeyError') yield r if to_reporemove: for r in ds.repo.remove(list(to_reporemove), r=True): # these were removed, but we still need to save the # removal r_abs = opj(ds.path, r) if r_abs in to_reporemove: ap = to_reporemove[r_abs] else: ap = {'path': r_abs, 'parentds': ds.path, 'refds': refds_path } ap['unavailable_path_status'] = '' to_save.append(ap) yield get_status_dict( status='ok', path=r, **res_kwargs) if not to_save: # nothing left to do, potentially all errored before return if not save: lgr.debug('Not calling `save` as instructed') return for res in Save.__call__( path=[ap["path"] for ap in to_save], # we might have removed the reference dataset by now, recheck dataset=refds_path if (refds_path and GitRepo.is_valid_repo(refds_path)) else None, message=message if message else '[DATALAD] removed content', return_type='generator', result_xfm=None, result_filter=None, on_failure='ignore'): yield res
def __call__( path=None, dataset=None, recursive=False, check=True, if_dirty='save-before'): if dataset: dataset = require_dataset( dataset, check_installed=False, purpose='removal') if not dataset.is_installed() and not path: # all done already return [] if not path: # act on the whole dataset if nothing else was specified path = dataset.path if isinstance(dataset, Dataset) else dataset content_by_ds, unavailable_paths = Interface._prep( path=path, dataset=dataset, recursive=recursive) nonexistent_paths = [] for p in unavailable_paths: # we need to check whether any of these correspond # to a known subdataset, and add those to the list of # things to be removed toppath = get_dataset_root(p) if not toppath: nonexistent_paths.append(p) continue if p in Dataset(toppath).get_subdatasets( recursive=False, absolute=True): # this is a known subdataset that needs to be removed pl = content_by_ds.get(p, []) pl.append(p) content_by_ds[p] = pl if nonexistent_paths: lgr.warning("ignoring non-existent path(s): %s", nonexistent_paths) if path_is_under(content_by_ds): # behave like `rm` and refuse to remove where we are raise ValueError( "refusing to uninstall current or parent directory") handle_dirty_datasets( content_by_ds, mode=if_dirty, base=dataset) ds2save = set() results = [] # iterate over all datasets, starting at the bottom # to make the removal of dataset content known upstairs for ds_path in sorted(content_by_ds, reverse=True): ds = Dataset(ds_path) paths = content_by_ds[ds_path] if ds_path in paths: # entire dataset needs to go superds = ds.get_superdataset( datalad_only=False, topmost=False) res = _uninstall_dataset(ds, check=check, has_super=False) results.extend(res) if ds.path in ds2save: # we just uninstalled it, no need to save anything ds2save.discard(ds.path) if not superds: continue subds_relpath = relpath(ds_path, start=superds.path) # remove submodule reference submodule = [sm for sm in superds.repo.repo.submodules if sm.path == subds_relpath] # there can only be one! assert(len(submodule) == 1) submodule = submodule[0] submodule.remove() if exists(ds_path): # could be an empty dir in case an already uninstalled subdataset # got removed os.rmdir(ds_path) # need to save changes to .gitmodules later content_by_ds[superds.path] = \ content_by_ds.get(superds.path, []) \ + [opj(superds.path, '.gitmodules'), ds_path] ds2save.add(superds.path) else: if check and hasattr(ds.repo, 'drop'): _drop_files(ds, paths, check=True) results.extend(ds.repo.remove(paths, r=True)) ds2save.add(ds.path) if dataset and dataset.is_installed(): # forge chain from base dataset to any leaf dataset # in order to save state changes all the way up _discover_trace_to_known(dataset.path, [], content_by_ds) save_dataset_hierarchy( content_by_ds, base=dataset.path if dataset and dataset.is_installed() else None, message='[DATALAD] removed content') return results
def __call__(path=None, dataset=None, recursive=False, check=True, if_dirty='save-before'): refds_path = Interface.get_refds_path(dataset) res_kwargs = dict(action='uninstall', logger=lgr, refds=refds_path) if dataset and not path: # act on the whole dataset if nothing else was specified path = refds_path if not dataset and not path: raise InsufficientArgumentsError( "insufficient information for `uninstall`: requires at least a path or dataset" ) to_uninstall = [] for ap in AnnotatePaths.__call__( dataset=refds_path, path=path, recursive=recursive, action='uninstall', # justification for status: # content need not be uninstalled where there is none unavailable_path_status='notneeded', nondataset_path_status='error', return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue # upfront sanity and compliance checks # check that we have no top-level datasets and not files to process if ap.get('type') == 'dataset' and \ not ap.get('state', None) == 'absent' and \ path_is_under([ap['path']]): # wants a sequence! ap.update( status='error', message="refusing to uninstall current or parent directory" ) yield ap continue if not ap.get('type', None) == 'dataset': ap.update( status='impossible', message= "can only uninstall datasets (consider the `drop` command)" ) yield ap continue # we only have dataset from here if not ap.get('parentds', None): ap.update( status='error', message= "will not uninstall top-level dataset (consider `remove` command)" ) yield ap continue if not ap['path'] == refds_path: ap['process_content'] = True to_uninstall.append(ap) # iterate over all datasets, starting at the bottom # to deinit contained submodules first for ap in sorted(to_uninstall, key=lambda x: x['path'], reverse=True): if ap.get('state', None) == 'absent': # already gone continue ds = Dataset(ap['path']) # TODO generator # this should yield what it did handle_dirty_dataset(ds, mode=if_dirty) # we confirmed the super dataset presence above for r in _uninstall_dataset(ds, check=check, has_super=True, **res_kwargs): yield r
def __call__( path=None, dataset=None, recursive=False, check=True, if_dirty='save-before'): refds_path = Interface.get_refds_path(dataset) res_kwargs = dict(action='uninstall', logger=lgr, refds=refds_path) if dataset and not path: # act on the whole dataset if nothing else was specified path = refds_path if not dataset and not path: raise InsufficientArgumentsError( "insufficient information for `uninstall`: requires at least a path or dataset") to_uninstall = [] for ap in AnnotatePaths.__call__( dataset=refds_path, path=path, recursive=recursive, action='uninstall', # justification for status: # content need not be uninstalled where there is none unavailable_path_status='notneeded', nondataset_path_status='error', return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue # upfront sanity and compliance checks # check that we have no top-level datasets and not files to process if ap.get('type') == 'dataset' and \ not ap.get('state', None) == 'absent' and \ path_is_under([ap['path']]): # wants a sequence! ap.update( status='error', message="refusing to uninstall current or parent directory") yield ap continue if not ap.get('type', None) == 'dataset': ap.update( status='impossible', message="can only uninstall datasets (consider the `drop` command)") yield ap continue # we only have dataset from here if not ap.get('parentds', None): # this could be a side-effect of the specific call semantics. # As stated in #1714, we are not really interested in whether # a superdataset was obvious in the call, but only whether there # is a superdataset at all. So let's look for one, and only barf # when there really isn't parentds = Dataset(ap['path']).get_superdataset( datalad_only=False, topmost=False, # unless it is properly registered we have no way of # reinstalling it registered_only=True) if parentds is None: ap.update( status='error', message="will not uninstall top-level dataset (consider `remove` command)") yield ap continue ap['parentds'] = parentds.path if not ap['path'] == refds_path: ap['process_content'] = True to_uninstall.append(ap) # iterate over all datasets, starting at the bottom # to deinit contained submodules first for ap in sorted(to_uninstall, key=lambda x: x['path'], reverse=True): if ap.get('state', None) == 'absent': # already gone continue ds = Dataset(ap['path']) # TODO generator # this should yield what it did handle_dirty_dataset(ds, mode=if_dirty) # we confirmed the super dataset presence above for r in _uninstall_dataset(ds, check=check, has_super=True, **res_kwargs): yield r
def __call__( path=None, dataset=None, recursive=False, check=True, save=True, message=None, if_dirty='save-before'): res_kwargs = dict(action='remove', logger=lgr) if not dataset and not path: raise InsufficientArgumentsError( "insufficient information for `remove`: requires at least a path or dataset") refds_path = Interface.get_refds_path(dataset) res_kwargs['refds'] = refds_path if refds_path and not path and not GitRepo.is_valid_repo(refds_path): # nothing here, nothing to remove yield get_status_dict(path=refds_path, status='notneeded', **res_kwargs) return if refds_path and not path: # act on the whole dataset if nothing else was specified # TODO i think that would happen automatically in annotation? path = refds_path to_process = [] for ap in AnnotatePaths.__call__( path=path, dataset=refds_path, recursive=recursive, # we only ever want to discover immediate subdatasets, the rest # will happen in `uninstall` recursion_limit=1, action='remove', unavailable_path_status='', nondataset_path_status='error', return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue if ap.get('state', None) == 'absent' and \ ap.get('parentds', None) is None: # nothing exists at location, and there is no parent to # remove from ap['status'] = 'notneeded' ap['message'] = "path does not exist and is not in a dataset" yield ap continue if ap.get('raw_input', False) and ap.get('type', None) == 'dataset': # make sure dataset sorting yields a dedicted entry for this one ap['process_content'] = True to_process.append(ap) if not to_process: # nothing left to do, potentially all errored before return if path_is_under([ap['path'] for ap in to_process]): # behave like `rm` and refuse to remove where we are raise ValueError( "refusing to uninstall current or parent directory") # now sort into datasets so we can process them one by one content_by_ds, ds_props, completed, nondataset_paths = \ annotated2content_by_ds( to_process, refds_path=refds_path) assert(not completed) # iterate over all datasets, starting at the bottom # to make the removal of dataset content known upstairs to_save = [] # track which submodules we have removed in the process, to avoid # failure in case we revisit them due to a subsequent path argument subm_removed = [] for ds_path in sorted(content_by_ds, reverse=True): ds = Dataset(ds_path) paths = content_by_ds[ds_path] to_reporemove = dict() # PLAN any dataset that was not raw_input, uninstall (passing recursive flag) # if dataset itself is in paths, skip any nondataset # sort reverse so we get subdatasets first for ap in sorted(paths, key=lambda x: x['path'], reverse=True): if ap.get('type', None) == 'dataset': # entire dataset needs to go, uninstall if present, pass recursive! uninstall_failed = False if ap['path'] == refds_path or \ (refds_path is None and ap.get('raw_input', False)): # top-level handling, cannot use regular uninstall call, as # it will refuse to uninstall a top-level dataset # and rightfully so, it is really a remove in that case # bypass all the safety by using low-level helper for r in _uninstall_dataset(ds, check=check, has_super=False, **res_kwargs): if r['status'] in ('impossible', 'error'): # we need to inspect if something went wrong, in order # to prevent failure from removing a non-empty dir below, # but at the same time allow for continued processing uninstall_failed = True r['refds'] = refds_path yield r # recheck that it wasn't removed during a previous iteration elif ap.get('state', None) != 'absent' and GitRepo.is_valid_repo(ap['path']): # anything that is not the top-level -> regular uninstall # this is for subdatasets of the to-be-removed dataset # we want to simply uninstall them in a regular manner for r in Uninstall.__call__( # use annotate path as input, but pass a copy because # we cannot rely on it being unaltered by reannotation # TODO maybe adjust annotate_path to do that [ap.copy()], dataset=refds_path, recursive=recursive, check=check, if_dirty=if_dirty, result_xfm=None, result_filter=None, on_failure='ignore'): if r['status'] in ('impossible', 'error'): # we need to inspect if something went wrong, in order # to prevent failure from removing a non-empty dir below, # but at the same time allow for continued processing uninstall_failed = True yield r if not ap.get('raw_input', False): # we only ever want to actually unregister subdatasets that # were given explicitly continue if not uninstall_failed and \ not ap['path'] in subm_removed and \ refds_path and \ ap.get('parentds', None) and \ not (relpath(ap['path'], start=refds_path).startswith(pardir) or ap['path'] == refds_path) and \ ap.get('registered_subds', False): # strip from superdataset, but only if a dataset was given explcitly # as in "remove from this dataset", but not when just a path was given # as in "remove from the filesystem" subds_relpath = relpath(ap['path'], start=ap['parentds']) # remove submodule reference parentds = Dataset(ap['parentds']) # play safe, will fail on dirty parentds.repo.deinit_submodule(ap['path']) # remove now empty submodule link parentds.repo.remove(ap['path']) # make a record that we removed this already, should it be # revisited via another path argument, because do not reannotate # the paths after every removal subm_removed.append(ap['path']) yield dict(ap, status='ok', **res_kwargs) # need .gitmodules update in parent to_save.append(dict( path=opj(parentds.path, '.gitmodules'), parents=parentds.path, type='file')) # and the removal itself needs to be committed # inform `save` that it is OK that this path # doesn't exist on the filesystem anymore ap['unavailable_path_status'] = '' ap['process_content'] = False to_save.append(ap) if not uninstall_failed and exists(ap['path']): # could be an empty dir in case an already uninstalled subdataset # got removed rmdir(ap['path']) else: # anything that is not a dataset can simply be passed on to_reporemove[ap['path']] = ap # avoid unnecessary git calls when there is nothing to do if to_reporemove: if check and hasattr(ds.repo, 'drop'): for r in _drop_files(ds, list(to_reporemove), check=True): if r['status'] == 'error': # if drop errored on that path, we can't remove it to_reporemove.pop(r['path'], 'avoidKeyError') yield r if to_reporemove: for r in ds.repo.remove(list(to_reporemove), r=True): # these were removed, but we still need to save the # removal r_abs = opj(ds.path, r) if r_abs in to_reporemove: ap = to_reporemove[r_abs] else: ap = {'path': r_abs, 'parentds': ds.path, 'refds': refds_path } ap['unavailable_path_status'] = '' to_save.append(ap) yield get_status_dict( status='ok', path=r, **res_kwargs) if not to_save: # nothing left to do, potentially all errored before return if not save: lgr.debug('Not calling `save` as instructed') return for res in Save.__call__( # TODO compose hand-selected annotated paths path=to_save, # we might have removed the reference dataset by now, recheck dataset=refds_path if (refds_path and GitRepo.is_valid_repo(refds_path)) else None, message=message if message else '[DATALAD] removed content', return_type='generator', result_xfm=None, result_filter=None, on_failure='ignore'): yield res