Example #1
0
def check_md5(url: str, dest_paths: List[Path]) -> bool:
    r = requests.get(url + '.md5')
    if r.status_code != 200:
        return False
    expected = {}
    for line in r.text.splitlines():
        _md5, fname = line.split(' ', maxsplit=1)
        if fname[0] != '*':
            if fname[0] == ' ':
                log.warning(f'Hash generated in text mode for {fname}, comparison could be incorrect')
            else:
                log.error(f'Unknown hash content format in {url + ".md5"}')
                return False
        expected[fname[1:]] = _md5

    done = None
    not_done = []
    for base_path in dest_paths:
        if all(file_md5(base_path / p) == _md5 for p, _md5 in expected.items()):
            done = base_path
        else:
            not_done.append(base_path)

    if done is None:
        return False

    for base_path in not_done:
        log.info(f'Copying data from {done} to {base_path}')
        for p in expected.keys():
            shutil.copy(done/p, base_path/p)
    return True
Example #2
0
def check_md5(url: str,
              dest_paths: List[Path],
              headers: Optional[dict] = None) -> bool:
    url_md5 = path_set_md5(url)

    try:
        if url_md5.startswith('s3://'):
            import boto3

            s3 = boto3.resource('s3')
            bucket, key = url_md5[5:].split('/', maxsplit=1)
            obj = s3.Object(bucket, key)
            data = obj.get()['Body'].read().decode('utf8')
        else:
            r = requests.get(url_md5, headers=headers)
            if r.status_code != 200:
                return False
            data = r.text
    except Exception as e:
        log.debug(
            f'Could not download {url_md5} because of an exception {type(e)}: {e}'
        )
        return False

    expected = {}
    for line in data.splitlines():
        _md5, fname = line.split(' ', maxsplit=1)
        if fname[0] != '*':
            if fname[0] == ' ':
                log.warning(
                    f'Hash generated in text mode for {fname}, comparison could be incorrect'
                )
            else:
                log.error(f'Unknown hash content format in {url + ".md5"}')
                return False
        expected[fname[1:]] = _md5

    done = None
    not_done = []
    for base_path in dest_paths:
        if all(
                file_md5(base_path / p) == _md5
                for p, _md5 in expected.items()):
            done = base_path
        else:
            not_done.append(base_path)

    if done is None:
        return False

    for base_path in not_done:
        log.info(f'Copying data from {done} to {base_path}')
        for p in expected.keys():
            shutil.copy(done / p, base_path / p)
    return True
Example #3
0
def compute_hashes(fpath: Union[str, Path]) -> Dict[str, str]:
    p = Path(fpath).expanduser()
    if not p.is_file():
        raise RuntimeError(f'{p} is not a file')

    if '.tar' in {s.lower() for s in p.suffixes}:
        hashes = tar_md5(p)
    elif p.suffix.lower() == '.gz':
        hashes = {p.with_suffix('').name: gzip_md5(p)}
    else:
        hashes = {p.name: file_md5(p)}
    return hashes
Example #4
0
def compute_hashes(fpath: Union[str, Path]) -> Dict[str, str]:
    p = Path(fpath).expanduser()
    if not p.is_file():
        raise RuntimeError(f'{p} is not a file')

    if '.tar' in {s.lower() for s in p.suffixes}:
        hashes = tar_md5(p)
    elif p.suffix.lower() == '.gz':
        hashes = {p.with_suffix('').name: gzip_md5(p)}
    elif p.suffix.lower() == '.zip':
        hashes = zip_md5(p)
    else:
        hashes = {p.name: file_md5(p)}
    return hashes