def _download_file(self, url, cache_dir, clobber=False, offline=False, keep_uuid=False, file_size=None, hash=None): local_path = cache_dir + os.sep + os.path.basename(url) if not keep_uuid: local_path = remove_uuid_file(local_path, dry=True) if Path(local_path).exists(): # overwrites the file if the expected filesize is different from the cached filesize if file_size and Path(local_path).stat().st_size != file_size: clobber = True # overwrites the file if the expected hash is different from the cached hash if hash and hashfile.md5(Path(local_path)) != hash: clobber = True # if there is no cached file, download else: clobber = True if clobber: local_path = wc.http_download_file( url, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=str(cache_dir), clobber=clobber, offline=offline) if keep_uuid: return local_path else: return remove_uuid_file(local_path)
def download_raw_partial(self, url_cbin, url_ch, first_chunk=0, last_chunk=0): assert url_cbin.endswith('.cbin') assert url_ch.endswith('.ch') relpath = Path(url_cbin.replace(self._par.HTTP_DATA_SERVER, '.')).parents[0] target_dir = Path(self._get_cache_dir(None), relpath) Path(target_dir).mkdir(parents=True, exist_ok=True) # First, download the .ch file. ch_local_path = Path(wc.http_download_file( url_ch, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=target_dir, clobber=True, offline=False, return_md5=False)) ch_local_path = remove_uuid_file(ch_local_path) ch_local_path = ch_local_path.rename(ch_local_path.with_suffix('.chopped.ch')) assert ch_local_path.exists() # Load the .ch file. with open(ch_local_path, 'r') as f: cmeta = json.load(f) # Get the first byte and number of bytes to download. i0 = cmeta['chunk_bounds'][first_chunk] cmeta['chunk_bounds'] = cmeta['chunk_bounds'][first_chunk:last_chunk + 2] cmeta['chunk_bounds'] = [_ - i0 for _ in cmeta['chunk_bounds']] assert len(cmeta['chunk_bounds']) >= 2 assert cmeta['chunk_bounds'][0] == 0 first_byte = cmeta['chunk_offsets'][first_chunk] cmeta['chunk_offsets'] = cmeta['chunk_offsets'][first_chunk:last_chunk + 2] cmeta['chunk_offsets'] = [_ - first_byte for _ in cmeta['chunk_offsets']] assert len(cmeta['chunk_offsets']) >= 2 assert cmeta['chunk_offsets'][0] == 0 n_bytes = cmeta['chunk_offsets'][-1] assert n_bytes > 0 # Save the chopped chunk bounds and ossets. cmeta['sha1_compressed'] = None cmeta['sha1_uncompressed'] = None cmeta['chopped'] = True with open(ch_local_path, 'w') as f: json.dump(cmeta, f, indent=2, sort_keys=True) # Download the requested chunks cbin_local_path = wc.http_download_file( url_cbin, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=target_dir, clobber=True, offline=False, return_md5=False, chunks=(first_byte, n_bytes)) cbin_local_path = remove_uuid_file(cbin_local_path) cbin_local_path = cbin_local_path.rename(cbin_local_path.with_suffix('.chopped.cbin')) assert cbin_local_path.exists() import mtscomp reader = mtscomp.decompress(cbin_local_path, cmeta=ch_local_path) return reader[:]
def _download_file(self, url, target_dir, clobber=False, offline=False, keep_uuid=False, file_size=None, hash=None): """ Downloads a single file from an HTTP webserver :param url: :param cache_dir: :param clobber: (bool: False) overwrites local dataset if any :param offline: :param keep_uuid: :param file_size: :param hash: :return: """ Path(target_dir).mkdir(parents=True, exist_ok=True) local_path = str(target_dir) + os.sep + os.path.basename(url) if not keep_uuid: local_path = alfio.remove_uuid_file(local_path, dry=True) if Path(local_path).exists() and not offline: # the local file hash doesn't match the dataset table cached hash hash_mismatch = hash and hashfile.md5(Path(local_path)) != hash file_size_mismatch = file_size and Path( local_path).stat().st_size != file_size if hash_mismatch or file_size_mismatch: clobber = True _logger.warning( f" local md5 or size mismatch, re-downloading {local_path}" ) # if there is no cached file, download else: clobber = True if clobber: local_path, md5 = wc.http_download_file( url, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=str(target_dir), clobber=clobber, offline=offline, return_md5=True) # post download, if there is a mismatch between Alyx and the newly downloaded file size # or hash flag the offending file record in Alyx for database maintenance hash_mismatch = hash and md5 != hash file_size_mismatch = file_size and Path( local_path).stat().st_size != file_size if hash_mismatch or file_size_mismatch: self._tag_mismatched_file_record(url) if keep_uuid: return local_path else: return alfio.remove_uuid_file(local_path)
def _download_file(self, url, target_dir, clobber=False, offline=False, keep_uuid=False, file_size=None, hash=None): """ Downloads a single file from an HTTP webserver :param url: :param cache_dir: :param clobber: (bool: False) overwrites local dataset if any :param offline: :param keep_uuid: :param file_size: :param hash: :return: """ Path(target_dir).mkdir(parents=True, exist_ok=True) local_path = str(target_dir) + os.sep + os.path.basename(url) if not keep_uuid: local_path = remove_uuid_file(local_path, dry=True) if Path(local_path).exists(): # overwrites the file if the expected filesize is different from the cached filesize if file_size and Path(local_path).stat().st_size != file_size: clobber = True # overwrites the file if the expected hash is different from the cached hash if hash and hashfile.md5(Path(local_path)) != hash: clobber = True # if there is no cached file, download else: clobber = True if clobber: local_path = wc.http_download_file( url, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=str(target_dir), clobber=clobber, offline=offline) if keep_uuid: return local_path else: return remove_uuid_file(local_path)
def test_load_uuid(self): dataset_types = ['eye.blink'] eid = ('https://test.alyx.internationalbrainlab.org/' 'sessions/' + self.eid) filename = one.load(eid, dataset_types=dataset_types, download_only=True, keep_uuid=True) uuid_fn = filename[0] filename = one.load(eid, dataset_types=dataset_types, download_only=True) self.assertTrue(filename[0] == remove_uuid_file(uuid_fn)) self.assertFalse(Path(uuid_fn).exists()) filename = one.load(eid, dataset_types=dataset_types, download_only=True, keep_uuid=True) self.assertTrue(Path(uuid_fn).exists())
def _download_file(self, url, cache_dir, clobber=False, offline=False, keep_uuid=False): local_path = cache_dir + os.sep + os.path.basename(url) if not keep_uuid: local_path = remove_uuid_file(local_path, dry=True) if clobber or not Path(local_path).exists(): local_path = wc.http_download_file( url, username=self._par.HTTP_DATA_SERVER_LOGIN, password=self._par.HTTP_DATA_SERVER_PWD, cache_dir=str(cache_dir), clobber=clobber, offline=offline) if keep_uuid: return local_path else: return remove_uuid_file(local_path)