def assure_extracted(self): """Return path to the extracted `archive`. Extract archive if necessary """ path = self.path with lock_if_check_fails(check=(lambda s: s.is_extracted, (self, )), lock_path=path, operation="extract") as (check, lock): if lock: assert not check self._extract_archive(path) return path
def transfer_retrieve(self, key, file): akeys_tried = [] # the same file could come from multiple files within the same archive # So far it doesn't make sense to "try all" of them since if one fails # it means the others would fail too, so it makes sense to immediately # prune the list so we keep only the ones from unique akeys. # May be whenever we support extraction directly from the tarballs # we should go through all and choose the one easiest to get or smth. for akey, afile in self._gen_akey_afiles(key, sorted=True, unique_akeys=True): if not akey: lgr.warning("Got an empty archive key %r for key %s. Skipping", akey, key) continue akeys_tried.append(akey) try: with lock_if_check_fails( check=(self.get_contentlocation, (akey, )), lock_path=(lambda k: op.join( self.repo.path, '.git', 'datalad-archives-%s' % k), (akey, )), operation="annex-get") as (akey_fpath, lock): if lock: assert not akey_fpath self._annex_get_archive_by_key(akey) akey_fpath = self.get_contentlocation(akey) if not akey_fpath: raise RuntimeError( "We were reported to fetch it alright but now can't " "get its location. Check logic") akey_path = op.join(self.repo.path, akey_fpath) assert op.exists(akey_path), \ "Key file %s is not present" % akey_path # Extract that bloody file from the bloody archive # TODO: implement/use caching, for now a simple one # actually patool doesn't support extraction of a single file # https://github.com/wummel/patool/issues/20 # so pwd = getpwd() lgr.debug("Getting file {afile} from {akey_path} " "while PWD={pwd}".format(**locals())) was_extracted = self.cache[akey_path].is_extracted apath = self.cache[akey_path].get_extracted_file(afile) link_file_load(apath, file) if not was_extracted and self.cache[akey_path].is_extracted: self.message( "%s special remote is using an extraction cache " "under %s. Remove it with DataLad's 'clean' " "command to save disk space." % (ARCHIVES_SPECIAL_REMOTE, self.cache[akey_path].path), type='info', ) return except Exception as exc: ce = CapturedException(exc) self.message( "Failed to fetch {akey} containing {key}: {msg}".format( akey=akey, key=key, # we need to get rid of any newlines, or we might # break the special remote protocol msg=str(ce).replace('\n', '|'))) continue raise RemoteError("Failed to fetch any archive containing {key}. " "Tried: {akeys_tried}".format(**locals()))