def check_md5(url: str, dest_paths: List[Path]) -> bool: r = requests.get(url + '.md5') if r.status_code != 200: return False expected = {} for line in r.text.splitlines(): _md5, fname = line.split(' ', maxsplit=1) if fname[0] != '*': if fname[0] == ' ': log.warning(f'Hash generated in text mode for {fname}, comparison could be incorrect') else: log.error(f'Unknown hash content format in {url + ".md5"}') return False expected[fname[1:]] = _md5 done = None not_done = [] for base_path in dest_paths: if all(file_md5(base_path / p) == _md5 for p, _md5 in expected.items()): done = base_path else: not_done.append(base_path) if done is None: return False for base_path in not_done: log.info(f'Copying data from {done} to {base_path}') for p in expected.keys(): shutil.copy(done/p, base_path/p) return True
def check_md5(url: str, dest_paths: List[Path], headers: Optional[dict] = None) -> bool: url_md5 = path_set_md5(url) try: if url_md5.startswith('s3://'): import boto3 s3 = boto3.resource('s3') bucket, key = url_md5[5:].split('/', maxsplit=1) obj = s3.Object(bucket, key) data = obj.get()['Body'].read().decode('utf8') else: r = requests.get(url_md5, headers=headers) if r.status_code != 200: return False data = r.text except Exception as e: log.debug( f'Could not download {url_md5} because of an exception {type(e)}: {e}' ) return False expected = {} for line in data.splitlines(): _md5, fname = line.split(' ', maxsplit=1) if fname[0] != '*': if fname[0] == ' ': log.warning( f'Hash generated in text mode for {fname}, comparison could be incorrect' ) else: log.error(f'Unknown hash content format in {url + ".md5"}') return False expected[fname[1:]] = _md5 done = None not_done = [] for base_path in dest_paths: if all( file_md5(base_path / p) == _md5 for p, _md5 in expected.items()): done = base_path else: not_done.append(base_path) if done is None: return False for base_path in not_done: log.info(f'Copying data from {done} to {base_path}') for p in expected.keys(): shutil.copy(done / p, base_path / p) return True
def compute_hashes(fpath: Union[str, Path]) -> Dict[str, str]: p = Path(fpath).expanduser() if not p.is_file(): raise RuntimeError(f'{p} is not a file') if '.tar' in {s.lower() for s in p.suffixes}: hashes = tar_md5(p) elif p.suffix.lower() == '.gz': hashes = {p.with_suffix('').name: gzip_md5(p)} else: hashes = {p.name: file_md5(p)} return hashes
def compute_hashes(fpath: Union[str, Path]) -> Dict[str, str]: p = Path(fpath).expanduser() if not p.is_file(): raise RuntimeError(f'{p} is not a file') if '.tar' in {s.lower() for s in p.suffixes}: hashes = tar_md5(p) elif p.suffix.lower() == '.gz': hashes = {p.with_suffix('').name: gzip_md5(p)} elif p.suffix.lower() == '.zip': hashes = zip_md5(p) else: hashes = {p.name: file_md5(p)} return hashes