def get_local_data_path( path: PathLike, download_if_missing: bool = True, base_url: str = DATA_URL, base_path: PathLike = DATA_DIR, ) -> PathLike: """Returns the local file path of a dataset url If the requested local file corresponding to the url of the dataset does not exist, it is downloaded form the url and the local path is returned Args: path: name of the subdirectory implicitly car download_if_missing: download the dataset if it is not present locally base_url: base url of data repository base_path: base path where the datasets are cached locally Returns: usable local path to the file Raises: IOError if file does not exist and download is set to False """ url = urljoin(str(base_url), str(path)) path = Path(base_path) / path create_data_dir(path.parent) if not path.is_file(): if download_if_missing: download(url, path) else: raise IOError(f"Dataset {path} is missing.") return path
def repository_root(path: PathLike = None) -> Path: if path is None: path = __file__ if not isinstance(path, Path): path = Path(path) if path.is_file(): path = path.parent if '.git' in (child.name for child in path.iterdir()) or path == path.parent: return path else: return repository_root(path.parent)
def validate_paths(src: PathLike, dst: Optional[PathLike] = None, date_fmt: Optional[str] = None) -> tuple[Path, Path]: src = Path(src) dst = Path(dst) if dst else src.parent timestamp = datetime.now().strftime(date_fmt) if date_fmt else '' if not src.is_file(): raise FileNotFoundError(f'Failed to locate specified file {src}') if dst.is_dir(): dst = dst / (src.stem + timestamp) elif not dst.parent.is_dir(): raise NotADirectoryError( f'Failed to find destination directory {dst.parent}') return src.absolute(), dst.absolute()
def generate(cls, project: Project, path: PathLike, do_checksum: bool = True) -> "FileReport": """Generate a FileReport from a path in a Project.""" path = Path(path) if not path.is_file(): raise OSError(f"{path} is not a file") relative = project.relative_from_root(path) report = cls("./" + str(relative), path, do_checksum=do_checksum) # Checksum and ID if report.do_checksum: report.spdxfile.chk_sum = _checksum(path) else: # This path avoids a lot of heavy computation, which is handy for # scenarios where you only need a unique hash, not a consistent # hash. report.spdxfile.chk_sum = f"{random.getrandbits(160):040x}" spdx_id = md5() spdx_id.update(str(relative).encode("utf-8")) spdx_id.update(report.spdxfile.chk_sum.encode("utf-8")) report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}" spdx_info = project.spdx_info_of(path) for expression in spdx_info.spdx_expressions: for identifier in _LICENSING.license_keys(expression): # A license expression akin to Apache-1.0+ should register # correctly if LICENSES/Apache-1.0.txt exists. identifiers = {identifier} if identifier.endswith("+"): identifiers.add(identifier[:-1]) # Bad license if not identifiers.intersection(project.license_map): report.bad_licenses.add(identifier) # Missing license if not identifiers.intersection(project.licenses): report.missing_licenses.add(identifier) # Add license to report. report.spdxfile.licenses_in_file.append(identifier) # Copyright text report.spdxfile.copyright = "\n".join(sorted( spdx_info.copyright_lines)) return report
def ensure_directory(directory: PathLike) -> Path: """ ensure that a directory exists :param directory: directory path to ensure :returns: path to ensured directory """ if not isinstance(directory, Path): directory = Path(directory) directory = directory.expanduser() if directory.is_file(): directory = directory.parent if not directory.exists(): directory.mkdir(parents=True, exist_ok=True) return directory
def generate(cls, project: Project, path: PathLike, do_checksum: bool = True) -> "FileReport": """Generate a FileReport from a path in a Project.""" path = Path(path) if not path.is_file(): raise OSError(f"{path} is not a file") # pylint: disable=protected-access relative = project.relative_from_root(path) report = cls("./" + str(relative), path, do_checksum=do_checksum) # Checksum and ID if report.do_checksum: report.spdxfile.chk_sum = _checksum(path) else: # This path avoids a lot of heavy computation, which is handy for # scenarios where you only need a unique hash, not a consistent # hash. report.spdxfile.chk_sum = "%040x" % random.getrandbits(40) spdx_id = md5() spdx_id.update(str(relative).encode("utf-8")) spdx_id.update(report.spdxfile.chk_sum.encode("utf-8")) report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}" spdx_info = project.spdx_info_of(path) for expression in spdx_info.spdx_expressions: for identifier in _LICENSING.license_keys(expression): # Bad license if identifier not in project.license_map: report.bad_licenses.add(identifier) # Missing license if identifier not in project.licenses: report.missing_licenses.add(identifier) # Add license to report. report.spdxfile.licenses_in_file.append(identifier) # Copyright text report.spdxfile.copyright = "\n".join(sorted( spdx_info.copyright_lines)) return report
def repository_root(path: PathLike = None) -> Path: """ get the root directory of the current Git repository :param path: query path :return: repository root directory """ if path is None: path = __file__ if not isinstance(path, Path): path = Path(path) if path.is_file(): path = path.parent if '.git' in (child.name for child in path.iterdir()) or path == path.parent: return path else: return repository_root(path.parent)
def generate(cls, project: Project, path: PathLike) -> FileReportInfo: """Generate a FileReport from a path in a Project.""" path = Path(path) if not path.is_file(): raise OSError("{} is not a file".format(path)) # pylint: disable=protected-access relative = project._relative_from_root(path) report = cls("./" + str(relative), path) bad_licenses = set() missing_licenses = set() # Checksum and ID report.spdxfile.chk_sum = _checksum(path) spdx_id = md5() spdx_id.update(str(relative).encode("utf-8")) spdx_id.update(report.spdxfile.chk_sum.value.encode("utf-8")) report.spdxfile.spdx_id = "SPDXRef-{}".format(spdx_id.hexdigest()) spdx_info = project.spdx_info_of(path) for expression in spdx_info.spdx_expressions: for identifier in _LICENSING.license_keys(expression): # Bad license if identifier not in project.license_map: bad_licenses.add(identifier) # Missing license elif identifier not in project.licenses: missing_licenses.add(identifier) # Add license to report. report.spdxfile.add_lics(License.from_identifier(identifier)) # Copyright text report.spdxfile.copyright = "\n".join(spdx_info.copyright_lines) return FileReportInfo(report, bad_licenses, missing_licenses)
def is_file(localpath: PathLike) -> bool: localpath = Path(localpath) return localpath.is_file()