Exemple #1
0
def get_local_data_path(
    path: PathLike,
    download_if_missing: bool = True,
    base_url: str = DATA_URL,
    base_path: PathLike = DATA_DIR,
) -> PathLike:
    """Returns the local file path of a dataset url

    If the requested local file corresponding to the url of the dataset
    does not exist, it is downloaded form the url and the local path is returned

    Args:
        path: name of the subdirectory implicitly car
        download_if_missing: download the dataset if it is not present locally
        base_url: base url of data repository
        base_path: base path where the datasets are cached locally

    Returns:
        usable local path to the file

    Raises:
        IOError if file does not exist and download is set to False

    """
    url = urljoin(str(base_url), str(path))
    path = Path(base_path) / path
    create_data_dir(path.parent)

    if not path.is_file():
        if download_if_missing:
            download(url, path)
        else:
            raise IOError(f"Dataset {path} is missing.")

    return path
Exemple #2
0
def repository_root(path: PathLike = None) -> Path:
    if path is None:
        path = __file__
    if not isinstance(path, Path):
        path = Path(path)
    if path.is_file():
        path = path.parent
    if '.git' in (child.name for child in path.iterdir()) or path == path.parent:
        return path
    else:
        return repository_root(path.parent)
Exemple #3
0
def validate_paths(src: PathLike,
                   dst: Optional[PathLike] = None,
                   date_fmt: Optional[str] = None) -> tuple[Path, Path]:
    src = Path(src)
    dst = Path(dst) if dst else src.parent
    timestamp = datetime.now().strftime(date_fmt) if date_fmt else ''
    if not src.is_file():
        raise FileNotFoundError(f'Failed to locate specified file {src}')
    if dst.is_dir():
        dst = dst / (src.stem + timestamp)
    elif not dst.parent.is_dir():
        raise NotADirectoryError(
            f'Failed to find destination directory {dst.parent}')
    return src.absolute(), dst.absolute()
Exemple #4
0
    def generate(cls,
                 project: Project,
                 path: PathLike,
                 do_checksum: bool = True) -> "FileReport":
        """Generate a FileReport from a path in a Project."""
        path = Path(path)
        if not path.is_file():
            raise OSError(f"{path} is not a file")

        relative = project.relative_from_root(path)
        report = cls("./" + str(relative), path, do_checksum=do_checksum)

        # Checksum and ID
        if report.do_checksum:
            report.spdxfile.chk_sum = _checksum(path)
        else:
            # This path avoids a lot of heavy computation, which is handy for
            # scenarios where you only need a unique hash, not a consistent
            # hash.
            report.spdxfile.chk_sum = f"{random.getrandbits(160):040x}"
        spdx_id = md5()
        spdx_id.update(str(relative).encode("utf-8"))
        spdx_id.update(report.spdxfile.chk_sum.encode("utf-8"))
        report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}"

        spdx_info = project.spdx_info_of(path)
        for expression in spdx_info.spdx_expressions:
            for identifier in _LICENSING.license_keys(expression):
                # A license expression akin to Apache-1.0+ should register
                # correctly if LICENSES/Apache-1.0.txt exists.
                identifiers = {identifier}
                if identifier.endswith("+"):
                    identifiers.add(identifier[:-1])
                # Bad license
                if not identifiers.intersection(project.license_map):
                    report.bad_licenses.add(identifier)
                # Missing license
                if not identifiers.intersection(project.licenses):
                    report.missing_licenses.add(identifier)

                # Add license to report.
                report.spdxfile.licenses_in_file.append(identifier)

        # Copyright text
        report.spdxfile.copyright = "\n".join(sorted(
            spdx_info.copyright_lines))

        return report
Exemple #5
0
def ensure_directory(directory: PathLike) -> Path:
    """
    ensure that a directory exists

    :param directory: directory path to ensure
    :returns: path to ensured directory
    """

    if not isinstance(directory, Path):
        directory = Path(directory)
    directory = directory.expanduser()
    if directory.is_file():
        directory = directory.parent
    if not directory.exists():
        directory.mkdir(parents=True, exist_ok=True)
    return directory
Exemple #6
0
    def generate(cls,
                 project: Project,
                 path: PathLike,
                 do_checksum: bool = True) -> "FileReport":
        """Generate a FileReport from a path in a Project."""
        path = Path(path)
        if not path.is_file():
            raise OSError(f"{path} is not a file")

        # pylint: disable=protected-access
        relative = project.relative_from_root(path)
        report = cls("./" + str(relative), path, do_checksum=do_checksum)

        # Checksum and ID
        if report.do_checksum:
            report.spdxfile.chk_sum = _checksum(path)
        else:
            # This path avoids a lot of heavy computation, which is handy for
            # scenarios where you only need a unique hash, not a consistent
            # hash.
            report.spdxfile.chk_sum = "%040x" % random.getrandbits(40)
        spdx_id = md5()
        spdx_id.update(str(relative).encode("utf-8"))
        spdx_id.update(report.spdxfile.chk_sum.encode("utf-8"))
        report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}"

        spdx_info = project.spdx_info_of(path)
        for expression in spdx_info.spdx_expressions:
            for identifier in _LICENSING.license_keys(expression):
                # Bad license
                if identifier not in project.license_map:
                    report.bad_licenses.add(identifier)
                # Missing license
                if identifier not in project.licenses:
                    report.missing_licenses.add(identifier)

                # Add license to report.
                report.spdxfile.licenses_in_file.append(identifier)

        # Copyright text
        report.spdxfile.copyright = "\n".join(sorted(
            spdx_info.copyright_lines))

        return report
def repository_root(path: PathLike = None) -> Path:
    """
    get the root directory of the current Git repository

    :param path: query path
    :return: repository root directory
    """

    if path is None:
        path = __file__
    if not isinstance(path, Path):
        path = Path(path)
    if path.is_file():
        path = path.parent
    if '.git' in (child.name
                  for child in path.iterdir()) or path == path.parent:
        return path
    else:
        return repository_root(path.parent)
Exemple #8
0
    def generate(cls, project: Project, path: PathLike) -> FileReportInfo:
        """Generate a FileReport from a path in a Project."""
        path = Path(path)
        if not path.is_file():
            raise OSError("{} is not a file".format(path))

        # pylint: disable=protected-access
        relative = project._relative_from_root(path)
        report = cls("./" + str(relative), path)

        bad_licenses = set()
        missing_licenses = set()

        # Checksum and ID
        report.spdxfile.chk_sum = _checksum(path)
        spdx_id = md5()
        spdx_id.update(str(relative).encode("utf-8"))
        spdx_id.update(report.spdxfile.chk_sum.value.encode("utf-8"))
        report.spdxfile.spdx_id = "SPDXRef-{}".format(spdx_id.hexdigest())

        spdx_info = project.spdx_info_of(path)
        for expression in spdx_info.spdx_expressions:
            for identifier in _LICENSING.license_keys(expression):
                # Bad license
                if identifier not in project.license_map:
                    bad_licenses.add(identifier)
                # Missing license
                elif identifier not in project.licenses:
                    missing_licenses.add(identifier)

                # Add license to report.
                report.spdxfile.add_lics(License.from_identifier(identifier))

        # Copyright text
        report.spdxfile.copyright = "\n".join(spdx_info.copyright_lines)

        return FileReportInfo(report, bad_licenses, missing_licenses)
Exemple #9
0
def is_file(localpath: PathLike) -> bool:
    localpath = Path(localpath)
    return localpath.is_file()