def _key( git_url: str, commit_hash: str, specified_analyzer: SpecifiedAnalyzer ) -> str: analyzer_name = specified_analyzer.versioned_analyzer.name version = specified_analyzer.versioned_analyzer.version repo_id = url_to_repo_id(git_url) key = f"{analyzer_name}/{version}/{repo_id}/{commit_hash}/container.log" return key.replace("/", "___")
def container_log_key(git_url: str, commit_hash: str, image_id: str) -> str: """ Returns key that docker container log is stored with """ analyzer = VersionedAnalyzer.from_image_id(image_id) repo_id = url_to_repo_id(git_url) return ( f"{analyzer.name}/{analyzer.version}/{repo_id}/{commit_hash}/container.log" )
def analysis_key( self, git_url: str, commit_hash: str, specified_analyzer: SpecifiedAnalyzer, output_type: Optional[AnalyzerOutputType] = None, ) -> str: """ Key analysis report was uploaded with Args: git_url: Url of repo analyzed commit_hash: hash analyzed specified_analyzer: unique identifier of analysis container w/ parameters output_type: type of output we want to retrieve from SPECIFIED_ANALYZER if None then we lookup the output type of the analyzer in the registry Returns: key of report in S3 bucket """ manifest = self._registry_data.manifest_for( specified_analyzer.versioned_analyzer ) analyzer_type = manifest.analyzer_type repo_id = url_to_repo_id(git_url) if output_type is None: output_type = manifest.output_type extension = self._get_analyzer_output_extension(output_type) if len(specified_analyzer.parameters) == 0: analyzer_part = f"{manifest.analyzer_name}/{manifest.version}" else: param_part = "" for param_name in sorted(specified_analyzer.parameters): param_part += ( f"{param_name}:{specified_analyzer.parameters[param_name]}" ) analyzer_part = f"{manifest.analyzer_name}/{manifest.version}/{param_part}" if analyzer_type == AnalyzerType.git: # for now, we also include the commit_hash to make other parts of the pipelie # treat git analyzers similar to commit analyzers. # TODO: figure out a good way of actually deisgning for this,fingerprinting # the repo and keeping our determinism guarantee target_part = f"{repo_id}/{commit_hash}/output{extension}" elif analyzer_type == AnalyzerType.commit: target_part = f"{repo_id}/{commit_hash}/output{extension}" else: raise UnsupportedAnalyzerType(analyzer_type) key = f"{analyzer_part}/{target_part}" return key
def get_local_git_origin_and_commit(dir: str) -> Tuple[str, str]: try: repo = (subprocess.check_output( ["git", "config", "--get", "remote.origin.url"], cwd=dir).strip().decode("utf-8")) commit = (subprocess.check_output( ["git", "show", '--format="%H"', "--no-patch"], cwd=dir).strip().decode("utf-8")) return repo, commit.replace('"', "") except subprocess.CalledProcessError as ex: logger.error( f"failed to determine source git repo or commit for {dir}") # use same util function, but treat local relative dir path as repo hash_of_dir = url_to_repo_id(dir) logger.debug(f"Using {hash_of_dir}, {hash_of_dir} as repo, commit") return hash_of_dir, hash_of_dir
def _key( git_url: str, commit_hash: str, specified_analyzer: SpecifiedAnalyzer ) -> str: repo_id = url_to_repo_id(git_url) analyzer_name = specified_analyzer.versioned_analyzer.name version = specified_analyzer.versioned_analyzer.version if len(specified_analyzer.parameters) == 0: analyzer_part = f"{analyzer_name}/{version}" else: param_part = "" for param_name in sorted(specified_analyzer.parameters): param_part += ( f"{param_name}:{specified_analyzer.parameters[param_name]}" ) analyzer_part = f"{analyzer_name}/{version}/{param_part}" target_part = f"{repo_id}/{commit_hash}/output.tar.gz" key = f"{analyzer_part}/{target_part}" return key.replace("/", "___")