def verify_checksum(self, file_name: str, expected_checksum: Optional[str]) -> bool: real_checksum = compute_file_sha256(file_name) file_basename = os.path.basename(file_name) if expected_checksum is None: fatal( f"No expected checksum provided for file '{file_basename}'. Consider adding the " f"following line to thirdparty_src_checksums.txt (or re-run with --add-checksum):\n" f"{real_checksum} {file_basename}\n" ) return real_checksum == expected_checksum
def get_expected_checksum_and_maybe_add_to_file( self, filename: str, downloaded_path: str) -> str: if filename not in self.filename2checksum: if self.should_add_checksum: with open(self.checksum_file_path, 'rt') as inp: lines = inp.readlines() lines = [line.rstrip() for line in lines] checksum = compute_file_sha256(downloaded_path) lines.append("%s %s" % (checksum, filename)) with open(self.checksum_file_path, 'wt') as out: for line in lines: out.write(line + "\n") self.filename2checksum[filename] = checksum log("Added checksum for %s to %s: %s", filename, self.checksum_file_path, checksum) return checksum fatal("No expected checksum provided for {}".format(filename)) return self.filename2checksum[filename]
def get_expected_checksum_and_maybe_add_to_file( self, file_name: str, downloaded_path: Optional[str]) -> Optional[str]: if file_name not in self.file_name_to_checksum: if self.should_add_checksum and downloaded_path: with open(self.checksum_file_path, 'rt') as inp: lines = inp.readlines() lines = [line.rstrip() for line in lines] checksum = compute_file_sha256(downloaded_path) lines.append("%s %s" % (checksum, file_name)) with open(self.checksum_file_path, 'wt') as out: for line in lines: out.write(line + "\n") self.file_name_to_checksum[file_name] = checksum log("Added checksum for %s to %s: %s", file_name, self.checksum_file_path, checksum) return checksum return None return self.file_name_to_checksum[file_name]
def create_package(self) -> None: if os.path.exists(self.archive_tarball_path): logging.info("File already exists, deleting: %s", self.archive_tarball_path) os.remove(self.archive_tarball_path) # Create a symlink with a constant name so we can copy the file around and use it for # creating artifacts for pull request builds. archive_symlink_path = os.path.join(YB_THIRDPARTY_DIR, 'archive' + ARCHIVE_SUFFIX) archive_checksum_symlink_path = archive_symlink_path + CHECKSUM_SUFFIX tar_cmd = ['tar'] patterns_to_exclude = EXCLUDE_PATTERNS_RELATIVE_TO_ARCHIVE_ROOT + [ os.path.basename(file_path) for file_path in [archive_symlink_path, archive_checksum_symlink_path] ] for excluded_pattern in patterns_to_exclude: tar_cmd.extend([ '--exclude', '%s/%s' % (self.archive_dir_name, excluded_pattern) ]) for excluded_pattern in GENERAL_EXCLUDE_PATTERNS: tar_cmd.extend(['--exclude', excluded_pattern]) tar_cmd.extend( ['-czf', self.archive_tarball_path, self.archive_dir_name]) log_and_run_cmd(tar_cmd, cwd=self.build_dir_parent) sha256 = compute_file_sha256(self.archive_tarball_path) with open(self.archive_checksum_path, 'w') as sha256_file: sha256_file.write('%s %s\n' % (sha256, self.archive_tarball_name)) logging.info("Archive SHA256 checksum: %s, created checksum file: %s", sha256, self.archive_checksum_path) for file_path in [archive_symlink_path, archive_checksum_symlink_path]: remove_path(file_path) create_symlink_and_log(self.archive_tarball_path, archive_symlink_path) create_symlink_and_log(self.archive_checksum_path, archive_checksum_symlink_path)
def create_package(self) -> None: if os.path.exists(self.archive_tarball_path): logging.info("File already exists, deleting: %s", self.archive_tarball_path) os.remove(self.archive_tarball_path) tar_cmd = ['tar'] for excluded_pattern in EXCLUDE_PATTERNS_RELATIVE_TO_ARCHIVE_ROOT: tar_cmd.extend([ '--exclude', '%s/%s' % (self.archive_dir_name, excluded_pattern) ]) for excluded_pattern in GENERAL_EXCLUDE_PATTERNS: tar_cmd.extend(['--exclude', excluded_pattern]) tar_cmd.extend( ['-czf', self.archive_tarball_path, self.archive_dir_name]) log_and_run_cmd(tar_cmd, cwd=self.build_dir_parent) sha256 = compute_file_sha256(self.archive_tarball_path) with open(self.archive_sha256_path, 'w') as sha256_file: sha256_file.write('%s %s\n' % (sha256, self.archive_tarball_name)) logging.info("Archive SHA256 checksum: %s, created checksum file: %s", sha256, self.archive_sha256_path)
def ensure_file_downloaded( self, url: str, file_path: str, enable_using_alternative_url: bool, expected_checksum: Optional[str] = None, verify_checksum: bool = True) -> None: log(f"Ensuring {url} is downloaded to path {file_path}") file_name = os.path.basename(file_path) mkdir_if_missing(self.download_dir) if os.path.exists(file_path) and verify_checksum: # We check the filename against our checksum map only if the file exists. This is done # so that we would still download the file even if we don't know the checksum, making it # easier to add new third-party dependencies. if expected_checksum is None: expected_checksum = self.get_expected_checksum_and_maybe_add_to_file( file_name, downloaded_path=file_path) if self.verify_checksum(file_path, expected_checksum): log("No need to re-download %s: checksum already correct", file_name) return log("File %s already exists but has wrong checksum, removing", file_path) remove_path(file_path) log("Fetching %s from %s", file_name, url) download_successful = False alternative_url = ALTERNATIVE_URL_PREFIX + file_name total_attempts = 0 url_candidates = [url] if enable_using_alternative_url: url_candidates += [alternative_url] for effective_url in url_candidates: if effective_url == alternative_url: log("Switching to alternative download URL %s after %d attempts", alternative_url, total_attempts) sleep_time_sec = INITIAL_DOWNLOAD_RETRY_SLEEP_TIME_SEC for attempt_index in range(1, MAX_FETCH_ATTEMPTS + 1): try: total_attempts += 1 curl_cmd_line = [ self.curl_path, '-o', file_path, '-L', # follow redirects '--silent', '--show-error', '--location', effective_url] log("Running command: %s", shlex_join(curl_cmd_line)) subprocess.check_call(curl_cmd_line) download_successful = True break except subprocess.CalledProcessError as ex: log("Error downloading %s (attempt %d for this URL, total attempts %d): %s", self.curl_path, attempt_index, total_attempts, str(ex)) if attempt_index == MAX_FETCH_ATTEMPTS and effective_url == alternative_url: log("Giving up after %d attempts", MAX_FETCH_ATTEMPTS) raise ex log("Will retry after %.1f seconds", sleep_time_sec) time.sleep(sleep_time_sec) sleep_time_sec += DOWNLOAD_RETRY_SLEEP_INCREASE_SEC if download_successful: break if not os.path.exists(file_path): fatal("Downloaded '%s' but but unable to find '%s'", url, file_path) if verify_checksum: if expected_checksum is None: expected_checksum = self.get_expected_checksum_and_maybe_add_to_file( file_name, downloaded_path=file_path) if not self.verify_checksum(file_path, expected_checksum): fatal("File '%s' has wrong checksum after downloading from '%s'. " "Has %s, but expected: %s", file_path, url, compute_file_sha256(file_path), expected_checksum)
def verify_checksum(self, file_name: str, expected_checksum: str) -> bool: real_checksum = compute_file_sha256(file_name) return real_checksum == expected_checksum