def retrieve_downloads(download_info,
                       cache_dir,
                       show_progress,
                       disable_ssl_verification=False):
    """
    Retrieve downloads into the downloads cache.

    download_info is the DowloadInfo of downloads to retrieve.
    cache_dir is the pathlib.Path to the downloads cache.
    show_progress is a boolean indicating if download progress is printed to the console.
    disable_ssl_verification is a boolean indicating if certificate verification
        should be disabled for downloads using HTTPS.

    Raises FileNotFoundError if the downloads path does not exist.
    Raises NotADirectoryError if the downloads path is not a directory.
    """
    if not cache_dir.exists():
        raise FileNotFoundError(cache_dir)
    if not cache_dir.is_dir():
        raise NotADirectoryError(cache_dir)
    for download_name, download_properties in download_info.properties_iter():
        get_logger().info('Downloading "%s" to "%s" ...', download_name,
                          download_properties.download_filename)
        download_path = cache_dir / download_properties.download_filename
        _download_if_needed(download_path, download_properties.url,
                            show_progress, disable_ssl_verification)
        if download_properties.has_hash_url():
            get_logger().info('Downloading hashes for "%s"', download_name)
            _, hash_filename, hash_url = download_properties.hashes['hash_url']
            _download_if_needed(cache_dir / hash_filename, hash_url,
                                show_progress, disable_ssl_verification)
def _retrieve_local_files(file_iter, source_dir):
    """
    Retrieves all file paths in file_iter from the local source tree

    file_iter is an iterable of strings that are relative UNIX paths to
        files in the Chromium source.

    Returns a dict of relative UNIX path strings to a list of lines in the file as strings
    """
    files = dict()
    for file_path in file_iter:
        try:
            raw_content = (source_dir / file_path).read_bytes()
        except FileNotFoundError:
            get_logger().warning('Missing file from patches: %s', file_path)
            continue
        for encoding in TREE_ENCODINGS:
            try:
                content = raw_content.decode(encoding)
                break
            except UnicodeDecodeError:
                continue
        if not content:
            raise UnicodeDecodeError('Unable to decode with any encoding: %s' % file_path)
        files[file_path] = content.split('\n')
    if not files:
        get_logger().error('All files used by patches are missing!')
    return files
def check_gn_flags(gn_flags_path):
    """
    Checks if GN flags are sorted and not duplicated.

    gn_flags_path is a pathlib.Path to the GN flags file to check

    Returns True if warnings were logged; False otherwise
    """
    keys_seen = set()
    warnings = False
    with gn_flags_path.open(encoding=ENCODING) as file_obj:
        iterator = iter(file_obj.read().splitlines())
    try:
        previous = next(iterator)
    except StopIteration:
        return warnings
    for current in iterator:
        gn_key = current.split('=')[0]
        if gn_key in keys_seen:
            get_logger().warning('In GN flags %s, "%s" appears at least twice', gn_flags_path,
                                 gn_key)
            warnings = True
        else:
            keys_seen.add(gn_key)
        if current < previous:
            get_logger().warning('In GN flags %s, "%s" should be sorted before "%s"', gn_flags_path,
                                 current, previous)
            warnings = True
        previous = current
    return warnings
Exemple #4
0
def merge_platform_patches(platform_patches_dir, prepend_patches_dir):
    '''
    Prepends prepend_patches_dir into platform_patches_dir

    Returns True if successful, False otherwise
    '''
    if not (platform_patches_dir / _SERIES).exists():
        get_logger().error('Unable to find platform series file: %s',
                           platform_patches_dir / _SERIES)
        return False

    # Make series.orig file
    shutil.copyfile(str(platform_patches_dir / _SERIES),
                    str(platform_patches_dir / _SERIES_ORIG))

    # Make series.prepend
    shutil.copyfile(str(prepend_patches_dir / _SERIES),
                    str(platform_patches_dir / _SERIES_PREPEND))

    # Merge patches
    merge_patches([prepend_patches_dir], platform_patches_dir, prepend=True)
    (platform_patches_dir / _SERIES).replace(platform_patches_dir /
                                             _SERIES_MERGED)

    return True
def _validate_file_index(index_file, resolved_tree, cache_index_files):
    """
    Validation of file index and hashes against the source tree.
        Updates cache_index_files

    Returns True if the file index is valid; False otherwise
    """
    all_hashes_valid = True
    crc32_regex = re.compile(r'^[a-zA-Z0-9]{8}$')
    for entry in index_file.read().decode(ENCODING).splitlines():
        try:
            relative_path, file_hash = entry.split(_INDEX_HASH_DELIMITER)
        except ValueError as exc:
            get_logger().error('Could not split entry "%s": %s', entry, exc)
            continue
        if not relative_path or not file_hash:
            get_logger().error('Entry %s of domain substitution cache file index is not valid',
                               _INDEX_HASH_DELIMITER.join((relative_path, file_hash)))
            all_hashes_valid = False
            continue
        if not crc32_regex.match(file_hash):
            get_logger().error('File index hash for %s does not appear to be a CRC32 hash',
                               relative_path)
            all_hashes_valid = False
            continue
        if zlib.crc32((resolved_tree / relative_path).read_bytes()) != int(file_hash, 16):
            get_logger().error('Hashes do not match for: %s', relative_path)
            all_hashes_valid = False
            continue
        if relative_path in cache_index_files:
            get_logger().error('File %s shows up at least twice in the file index', relative_path)
            all_hashes_valid = False
            continue
        cache_index_files.add(relative_path)
    return all_hashes_valid
Exemple #6
0
def compute_lists_proc(path, source_tree, search_regex):
    """
    Adds the path to appropriate lists to be used by compute_lists.

    path is the pathlib.Path to the file from the current working directory.
    source_tree is a pathlib.Path to the source tree
    search_regex is a compiled regex object to search for domain names
    """
    used_pep_set = set() # PRUNING_EXCLUDE_PATTERNS
    used_pip_set = set() # PRUNING_INCLUDE_PATTERNS
    used_dep_set = set() # DOMAIN_EXCLUDE_PREFIXES
    used_dip_set = set() # DOMAIN_INCLUDE_PATTERNS
    pruning_set = set()
    domain_substitution_set = set()
    symlink_set = set()
    if path.is_file():
        relative_path = path.relative_to(source_tree)
        if path.is_symlink():
            try:
                resolved_relative_posix = path.resolve().relative_to(source_tree).as_posix()
                symlink_set.add((resolved_relative_posix, relative_path.as_posix()))
            except ValueError:
                # Symlink leads out of the source tree
                pass
        else:
            try:
                if should_prune(path, relative_path, used_pep_set, used_pip_set):
                    pruning_set.add(relative_path.as_posix())
                elif should_domain_substitute(path, relative_path, search_regex, used_dep_set,
                                              used_dip_set):
                    domain_substitution_set.add(relative_path.as_posix())
            except: #pylint: disable=bare-except
                get_logger().exception('Unhandled exception while processing %s', relative_path)
    return (used_pep_set, used_pip_set, used_dep_set, used_dip_set, pruning_set,
            domain_substitution_set, symlink_set)
def check_gn_flags(gn_flags_path):
    """
    Checks if GN flags are sorted and not duplicated.

    gn_flags_path is a pathlib.Path to the GN flags file to check

    Returns True if warnings were logged, False otherwise
    """
    keys_seen = set()
    warnings = False
    with gn_flags_path.open(encoding=ENCODING) as file_obj:
        iterator = iter(file_obj.read().splitlines())
    try:
        previous = next(iterator)
    except StopIteration:
        return warnings
    for current in iterator:
        gn_key = current.split('=')[0]
        if gn_key in keys_seen:
            get_logger().warning('In GN flags %s, "%s" appears at least twice',
                                 gn_flags_path, gn_key)
            warnings = True
        else:
            keys_seen.add(gn_key)
        if current < previous:
            get_logger().warning(
                'In GN flags %s, "%s" should be sorted before "%s"',
                gn_flags_path, current, previous)
            warnings = True
        previous = current
    return warnings
Exemple #8
0
def _get_gitiles_commit_before_date(repo_url, target_branch, target_datetime):
    """Returns the hexadecimal hash of the closest commit before target_datetime"""
    json_log_url = '{repo}/+log/{branch}?format=JSON'.format(repo=repo_url, branch=target_branch)
    with _get_requests_session() as session:
        response = session.get(json_log_url)
        response.raise_for_status()
        git_log = json.loads(response.text[5:]) # Trim closing delimiters for various structures
    assert len(git_log) == 2 # 'log' and 'next' entries
    assert 'log' in git_log
    assert git_log['log']
    git_log = git_log['log']
    # Check boundary conditions
    if _get_gitiles_git_log_date(git_log[0]) < target_datetime:
        # Newest commit is older than target datetime
        return git_log[0]['commit']
    if _get_gitiles_git_log_date(git_log[-1]) > target_datetime:
        # Oldest commit is newer than the target datetime; assume oldest is close enough.
        get_logger().warning('Oldest entry in gitiles log for repo "%s" is newer than target; '
                             'continuing with oldest entry...')
        return git_log[-1]['commit']
    # Do binary search
    low_index = 0
    high_index = len(git_log) - 1
    mid_index = high_index
    while low_index != high_index:
        mid_index = low_index + (high_index - low_index) // 2
        if _get_gitiles_git_log_date(git_log[mid_index]) > target_datetime:
            low_index = mid_index + 1
        else:
            high_index = mid_index
    return git_log[mid_index]['commit']
Exemple #9
0
def unpack_downloads(download_info, cache_dir, output_dir, extractors=None):
    """
    Unpack downloads in the downloads cache to output_dir. Assumes all downloads are retrieved.

    download_info is the DownloadInfo of downloads to unpack.
    cache_dir is the pathlib.Path directory containing the download cache
    output_dir is the pathlib.Path directory to unpack the downloads to.
    extractors is a dictionary of PlatformEnum to a command or path to the
        extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip.

    May raise undetermined exceptions during archive unpacking.
    """
    for download_name, download_properties in download_info.properties_iter():
        download_path = cache_dir / download_properties.download_filename
        get_logger().info('Unpacking "%s" to %s ...', download_name,
                          download_properties.output_path)
        extractor_name = download_properties.extractor or ExtractorEnum.TAR
        if extractor_name == ExtractorEnum.SEVENZIP:
            extractor_func = extract_with_7z
        elif extractor_name == ExtractorEnum.TAR:
            extractor_func = extract_tar_file
        else:
            raise NotImplementedError(extractor_name)

        if download_properties.strip_leading_dirs is None:
            strip_leading_dirs_path = None
        else:
            strip_leading_dirs_path = Path(
                download_properties.strip_leading_dirs)

        extractor_func(archive_path=download_path,
                       output_dir=output_dir /
                       Path(download_properties.output_path),
                       relative_to=strip_leading_dirs_path,
                       extractors=extractors)
def _validate_file_index(index_file, resolved_tree, cache_index_files):
    """
    Validation of file index and hashes against the source tree.
        Updates cache_index_files

    Returns True if the file index is valid; False otherwise
    """
    all_hashes_valid = True
    crc32_regex = re.compile(r'^[a-zA-Z0-9]{8}$')
    for entry in index_file.read().decode(ENCODING).splitlines():
        try:
            relative_path, file_hash = entry.split(_INDEX_HASH_DELIMITER)
        except ValueError as exc:
            get_logger().error('Could not split entry "%s": %s', entry, exc)
            continue
        if not relative_path or not file_hash:
            get_logger().error('Entry %s of domain substitution cache file index is not valid',
                               _INDEX_HASH_DELIMITER.join((relative_path, file_hash)))
            all_hashes_valid = False
            continue
        if not crc32_regex.match(file_hash):
            get_logger().error('File index hash for %s does not appear to be a CRC32 hash',
                               relative_path)
            all_hashes_valid = False
            continue
        if zlib.crc32((resolved_tree / relative_path).read_bytes()) != int(file_hash, 16):
            get_logger().error('Hashes do not match for: %s', relative_path)
            all_hashes_valid = False
            continue
        if relative_path in cache_index_files:
            get_logger().error('File %s shows up at least twice in the file index', relative_path)
            all_hashes_valid = False
            continue
        cache_index_files.add(relative_path)
    return all_hashes_valid
def _get_gitiles_commit_before_date(repo_url, target_branch, target_datetime):
    """Returns the hexadecimal hash of the closest commit before target_datetime"""
    json_log_url = '{repo}/+log/{branch}?format=JSON'.format(repo=repo_url, branch=target_branch)
    with _get_requests_session() as session:
        response = session.get(json_log_url)
        response.raise_for_status()
        git_log = json.loads(response.text[5:]) # Trim closing delimiters for various structures
    assert len(git_log) == 2 # 'log' and 'next' entries
    assert 'log' in git_log
    assert git_log['log']
    git_log = git_log['log']
    # Check boundary conditions
    if _get_gitiles_git_log_date(git_log[0]) < target_datetime:
        # Newest commit is older than target datetime
        return git_log[0]['commit']
    if _get_gitiles_git_log_date(git_log[-1]) > target_datetime:
        # Oldest commit is newer than the target datetime; assume oldest is close enough.
        get_logger().warning('Oldest entry in gitiles log for repo "%s" is newer than target; '
                             'continuing with oldest entry...')
        return git_log[-1]['commit']
    # Do binary search
    low_index = 0
    high_index = len(git_log) - 1
    mid_index = high_index
    while low_index != high_index:
        mid_index = low_index + (high_index - low_index) // 2
        if _get_gitiles_git_log_date(git_log[mid_index]) > target_datetime:
            low_index = mid_index + 1
        else:
            high_index = mid_index
    return git_log[mid_index]['commit']
Exemple #12
0
def _retrieve_local_files(file_iter, source_dir):
    """
    Retrieves all file paths in file_iter from the local source tree

    file_iter is an iterable of strings that are relative UNIX paths to
        files in the Chromium source.

    Returns a dict of relative UNIX path strings to a list of lines in the file as strings
    """
    files = dict()
    for file_path in file_iter:
        try:
            raw_content = (source_dir / file_path).read_bytes()
        except FileNotFoundError:
            get_logger().warning('Missing file from patches: %s', file_path)
            continue
        for encoding in TREE_ENCODINGS:
            try:
                content = raw_content.decode(encoding)
                break
            except UnicodeDecodeError:
                continue
        if not content:
            raise UnicodeDecodeError('Unable to decode with any encoding: %s' % file_path)
        files[file_path] = content.split('\n')
    if not files:
        get_logger().error('All files used by patches are missing!')
    return files
def unpack_downloads(download_info, cache_dir, output_dir, extractors=None):
    """
    Unpack downloads in the downloads cache to output_dir. Assumes all downloads are retrieved.

    download_info is the DownloadInfo of downloads to unpack.
    cache_dir is the pathlib.Path directory containing the download cache
    output_dir is the pathlib.Path directory to unpack the downloads to.
    extractors is a dictionary of PlatformEnum to a command or path to the
        extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip.

    May raise undetermined exceptions during archive unpacking.
    """
    for download_name, download_properties in download_info.properties_iter():
        download_path = cache_dir / download_properties.download_filename
        get_logger().info('Unpacking "%s" to %s ...', download_name,
                          download_properties.output_path)
        extractor_name = download_properties.extractor or ExtractorEnum.TAR
        if extractor_name == ExtractorEnum.SEVENZIP:
            extractor_func = extract_with_7z
        elif extractor_name == ExtractorEnum.TAR:
            extractor_func = extract_tar_file
        else:
            raise NotImplementedError(extractor_name)

        if download_properties.strip_leading_dirs is None:
            strip_leading_dirs_path = None
        else:
            strip_leading_dirs_path = Path(download_properties.strip_leading_dirs)

        extractor_func(
            archive_path=download_path,
            output_dir=output_dir / Path(download_properties.output_path),
            relative_to=strip_leading_dirs_path,
            extractors=extractors)
def compute_lists(source_tree, search_regex):
    """
    Compute the binary pruning and domain substitution lists of the source tree.
    Returns a tuple of two items in the following order:
    1. The sorted binary pruning list
    2. The sorted domain substitution list

    source_tree is a pathlib.Path to the source tree
    search_regex is a compiled regex object to search for domain names
    """
    pruning_set = set()
    domain_substitution_set = set()
    deferred_symlinks = dict(
    )  # POSIX resolved path -> set of POSIX symlink paths
    source_tree = source_tree.resolve()
    unused_patterns = UnusedPatterns()

    for path in source_tree.rglob('*'):
        if not path.is_file():
            # NOTE: Path.rglob() does not traverse symlink dirs; no need for special handling
            continue
        relative_path = path.relative_to(source_tree)
        if path.is_symlink():
            try:
                resolved_relative_posix = path.resolve().relative_to(
                    source_tree).as_posix()
            except ValueError:
                # Symlink leads out of the source tree
                continue
            if resolved_relative_posix in pruning_set:
                pruning_set.add(relative_path.as_posix())
            else:
                symlink_set = deferred_symlinks.get(resolved_relative_posix,
                                                    None)
                if symlink_set is None:
                    symlink_set = set()
                    deferred_symlinks[resolved_relative_posix] = symlink_set
                symlink_set.add(relative_path.as_posix())
            # Path has finished processing because...
            # Pruning: either symlink has been added or removal determination has been deferred
            # Domain substitution: Only the real paths can be added, not symlinks
            continue
        try:
            if should_prune(path, relative_path, unused_patterns):
                relative_posix_path = relative_path.as_posix()
                pruning_set.add(relative_posix_path)
                symlink_set = deferred_symlinks.pop(relative_posix_path,
                                                    tuple())
                if symlink_set:
                    pruning_set.update(symlink_set)
            elif should_domain_substitute(path, relative_path, search_regex,
                                          unused_patterns):
                domain_substitution_set.add(relative_path.as_posix())
        except:  #pylint: disable=bare-except
            get_logger().exception('Unhandled exception while processing %s',
                                   relative_path)
            exit(1)
    return sorted(pruning_set), sorted(
        domain_substitution_set), unused_patterns
def _validate_deps(deps_text):
    """Returns True if the DEPS file passes validation; False otherwise"""
    try:
        _DepsNodeVisitor().visit(ast.parse(deps_text))
    except _UnexpectedSyntaxError as exc:
        get_logger().error('%s', exc)
        return False
    return True
Exemple #16
0
def _validate_deps(deps_text):
    """Returns True if the DEPS file passes validation; False otherwise"""
    try:
        _DepsNodeVisitor().visit(ast.parse(deps_text))
    except _UnexpectedSyntaxError as exc:
        get_logger().error('%s', exc)
        return False
    return True
Exemple #17
0
def _chromium_hashes_generator(hashes_path):
    with hashes_path.open(encoding=ENCODING) as hashes_file:
        hash_lines = hashes_file.read().splitlines()
    for hash_name, hash_hex, _ in map(lambda x: x.lower().split('  '), hash_lines):
        if hash_name in hashlib.algorithms_available:
            yield hash_name, hash_hex
        else:
            get_logger().warning('Skipping unknown hash algorithm: %s', hash_name)
def _retrieve_callback(args):
    retrieve_downloads(
        DownloadInfo(args.ini), args.cache, args.show_progress, args.disable_ssl_verification)
    try:
        check_downloads(DownloadInfo(args.ini), args.cache)
    except HashMismatchError as exc:
        get_logger().error('File checksum does not match: %s', exc)
        exit(1)
def _chromium_hashes_generator(hashes_path):
    with hashes_path.open(encoding=ENCODING) as hashes_file:
        hash_lines = hashes_file.read().splitlines()
    for hash_name, hash_hex, _ in map(lambda x: x.lower().split('  '), hash_lines):
        if hash_name in hashlib.algorithms_available:
            yield hash_name, hash_hex
        else:
            get_logger().warning('Skipping unknown hash algorithm: %s', hash_name)
def _retrieve_callback(args):
    retrieve_downloads(DownloadInfo(args.ini), args.cache, args.show_progress,
                       args.disable_ssl_verification)
    try:
        check_downloads(DownloadInfo(args.ini), args.cache)
    except HashMismatchError as exc:
        get_logger().error('File checksum does not match: %s', exc)
        exit(1)
Exemple #21
0
def _callback(args):
    if not args.src.exists():
        get_logger().error('Specified directory does not exist: %s', args.src)
        exit(1)
    if not args.version_file.exists():
        get_logger().error('Could not find the Breeze version file: %s',
                           args.version_file)
        exit(1)
    update_version(args.src, args.version_file)
def compute_lists(source_tree, search_regex):
    """
    Compute the binary pruning and domain substitution lists of the source tree.
    Returns a tuple of two items in the following order:
    1. The sorted binary pruning list
    2. The sorted domain substitution list

    source_tree is a pathlib.Path to the source tree
    search_regex is a compiled regex object to search for domain names
    """
    pruning_set = set()
    domain_substitution_set = set()
    deferred_symlinks = dict() # POSIX resolved path -> set of POSIX symlink paths
    source_tree = source_tree.resolve()
    unused_patterns = UnusedPatterns()

    for path in source_tree.rglob('*'):
        if not path.is_file():
            # NOTE: Path.rglob() does not traverse symlink dirs; no need for special handling
            continue
        relative_path = path.relative_to(source_tree)
        if path.is_symlink():
            try:
                resolved_relative_posix = path.resolve().relative_to(source_tree).as_posix()
            except ValueError:
                # Symlink leads out of the source tree
                continue
            if resolved_relative_posix in pruning_set:
                pruning_set.add(relative_path.as_posix())
            else:
                symlink_set = deferred_symlinks.get(resolved_relative_posix, None)
                if symlink_set is None:
                    symlink_set = set()
                    deferred_symlinks[resolved_relative_posix] = symlink_set
                symlink_set.add(relative_path.as_posix())
            # Path has finished processing because...
            # Pruning: either symlink has been added or removal determination has been deferred
            # Domain substitution: Only the real paths can be added, not symlinks
            continue
        try:
            if should_prune(path, relative_path, unused_patterns):
                relative_posix_path = relative_path.as_posix()
                pruning_set.add(relative_posix_path)
                symlink_set = deferred_symlinks.pop(relative_posix_path, tuple())
                if symlink_set:
                    pruning_set.update(symlink_set)
            elif should_domain_substitute(path, relative_path, search_regex, unused_patterns):
                domain_substitution_set.add(relative_path.as_posix())
        except: #pylint: disable=bare-except
            get_logger().exception('Unhandled exception while processing %s', relative_path)
            exit(1)
    return sorted(pruning_set), sorted(domain_substitution_set), unused_patterns
Exemple #23
0
    def log_unused(self):
        """
        Logs unused patterns and prefixes

        Returns True if there are unused patterns or prefixes; False otherwise
        """
        have_unused = False
        for name in self._all_names:
            current_set = getattr(self, name, None)
            if current_set:
                get_logger().error('Unused from %s: %s', name.upper(), current_set)
                have_unused = True
        return have_unused
Exemple #24
0
 def sleep_for_retry(self, response=None):
     """Sleeps for Retry-After, and logs the sleep time"""
     if response:
         retry_after = self.get_retry_after(response)
         if retry_after:
             get_logger().info(
                 'Got HTTP status %s with Retry-After header. Retrying after %s seconds...',
                 response.status, retry_after)
         else:
             get_logger().info(
                 'Could not find Retry-After header for HTTP response %s. Status reason: %s',
                 response.status, response.reason)
     return super().sleep_for_retry(response)
    def log_unused(self):
        """
        Logs unused patterns and prefixes

        Returns True if there are unused patterns or prefixes; False otherwise
        """
        have_unused = False
        for name in self._all_names:
            current_set = getattr(self, name, None)
            if current_set:
                get_logger().error('Unused from %s: %s', name.upper(), current_set)
                have_unused = True
        return have_unused
 def sleep_for_retry(self, response=None):
     """Sleeps for Retry-After, and logs the sleep time"""
     if response:
         retry_after = self.get_retry_after(response)
         if retry_after:
             get_logger().info(
                 'Got HTTP status %s with Retry-After header. Retrying after %s seconds...',
                 response.status, retry_after)
         else:
             get_logger().info(
                 'Could not find Retry-After header for HTTP response %s. Status reason: %s',
                 response.status, response.reason)
     return super().sleep_for_retry(response)
def _files_generator_by_args(args):
    """Returns a files_generator() instance from the CLI args"""
    # --build-outputs
    if not args.build_outputs.exists():
        get_logger().error('Could not find build outputs: %s', args.build_outputs)
        raise FileNotFoundError(args.build_outputs)

    # --cfg
    if not args.cfg.exists():
        get_logger().error('Could not find FILES.cfg at %s', args.cfg)
        raise FileNotFoundError(args.cfg)

    return filescfg_generator(args.cfg, args.build_outputs, args.cpu_arch)
def _files_generator_by_args(args):
    """Returns a files_generator() instance from the CLI args"""
    # --build-outputs
    if not args.build_outputs.exists():
        get_logger().error('Could not find build outputs: %s', args.build_outputs)
        raise FileNotFoundError(args.build_outputs)

    # --cfg
    if not args.cfg.exists():
        get_logger().error('Could not find FILES.cfg at %s', args.cfg)
        raise FileNotFoundError(args.cfg)

    return filescfg_generator(args.cfg, args.build_outputs, args.cpu_arch)
Exemple #29
0
def _test_patches(series_iter, patch_cache, files_under_test):
    """
    Tests the patches specified in the iterable series_iter

    Returns a boolean indicating if any of the patches have failed
    """
    for patch_path_str in series_iter:
        for patched_file in patch_cache[patch_path_str]:
            try:
                _apply_file_unidiff(patched_file, files_under_test)
            except _PatchValidationError as exc:
                get_logger().warning('Patch failed validation: %s',
                                     patch_path_str)
                get_logger().debug(
                    'Specifically, file "%s" failed validation: %s',
                    patched_file.path, exc)
                return True
            except:  #pylint: disable=bare-except
                get_logger().warning('Patch failed validation: %s',
                                     patch_path_str)
                get_logger().debug(
                    'Specifically, file "%s" caused exception while applying:',
                    patched_file.path,
                    exc_info=True)
                return True
    return False
def _retrieve_remote_files(file_iter):
    """
    Retrieves all file paths in file_iter from Google

    file_iter is an iterable of strings that are relative UNIX paths to
        files in the Chromium source.

    Returns a dict of relative UNIX path strings to a list of lines in the file as strings
    """

    files = dict()

    root_deps_tree = _initialize_deps_tree()

    try:
        total_files = len(file_iter)
    except TypeError:
        total_files = None

    logger = get_logger()
    if total_files is None:
        logger.info('Downloading remote files...')
    else:
        logger.info('Downloading %d remote files...', total_files)
    last_progress = 0
    file_count = 0
    fallback_repo_manager = _FallbackRepoManager()
    with _get_requests_session() as download_session:
        download_session.stream = False  # To ensure connection to Google can be reused
        for file_path in file_iter:
            if total_files:
                file_count += 1
                current_progress = file_count * 100 // total_files // 5 * 5
                if current_progress != last_progress:
                    last_progress = current_progress
                    logger.info('%d%% downloaded', current_progress)
            else:
                current_progress = file_count // 20 * 20
                if current_progress != last_progress:
                    last_progress = current_progress
                    logger.info('%d files downloaded', current_progress)
            try:
                files[file_path] = _download_source_file(
                    download_session, root_deps_tree, fallback_repo_manager,
                    file_path).split('\n')
            except _NotInRepoError:
                get_logger().warning(
                    'Could not find "%s" remotely. Skipping...', file_path)
    return files
Exemple #31
0
def extract_tar_file(archive_path, output_dir, relative_to, extractors=None):
    """
    Extract regular or compressed tar archive into the output directory.

    archive_path is the pathlib.Path to the archive to unpack
    output_dir is a pathlib.Path to the directory to unpack. It must already exist.

    relative_to is a pathlib.Path for directories that should be stripped relative to the
        root of the archive, or None if no path components should be stripped.
    extractors is a dictionary of PlatformEnum to a command or path to the
        extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip and WinRAR.

    Raises ExtractionError if unexpected issues arise during unpacking.
    """
    if extractors is None:
        extractors = DEFAULT_EXTRACTORS

    current_platform = get_running_platform()
    if current_platform == PlatformEnum.WINDOWS:
        # Try to use 7-zip first
        sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP)
        if sevenzip_cmd == USE_REGISTRY:
            sevenzip_cmd = str(_find_7z_by_registry())
        sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd)
        if sevenzip_bin is not None:
            _extract_tar_with_7z(sevenzip_bin, archive_path, output_dir, relative_to)
            return

        # Use WinRAR if 7-zip is not found
        winrar_cmd = extractors.get(ExtractorEnum.WINRAR)
        if winrar_cmd == USE_REGISTRY:
            winrar_cmd = str(_find_winrar_by_registry())
        winrar_bin = _find_extractor_by_cmd(winrar_cmd)
        if winrar_bin is not None:
            _extract_tar_with_winrar(winrar_bin, archive_path, output_dir, relative_to)
            return
        get_logger().warning(
            'Neither 7-zip nor WinRAR were found. Falling back to Python extractor...')
    elif current_platform == PlatformEnum.UNIX:
        # NOTE: 7-zip isn't an option because it doesn't preserve file permissions
        tar_bin = _find_extractor_by_cmd(extractors.get(ExtractorEnum.TAR))
        if not tar_bin is None:
            _extract_tar_with_tar(tar_bin, archive_path, output_dir, relative_to)
            return
    else:
        # This is not a normal code path, so make it clear.
        raise NotImplementedError(current_platform)
    # Fallback to Python-based extractor on all platforms
    _extract_tar_with_python(archive_path, output_dir, relative_to)
Exemple #32
0
def check_series_duplicates(patches_dir, series_path=Path('series')):
    """
    Checks if there are duplicate entries in the series file

    series_path is a pathlib.Path to the series file relative to the patches_dir

    returns True if there are duplicate entries; False otherwise.
    """
    entries_seen = set()
    for entry in _read_series_file(patches_dir, series_path):
        if entry in entries_seen:
            get_logger().warning('Patch appears more than once in series: %s', entry)
            return True
        entries_seen.add(entry)
    return False
Exemple #33
0
def _download_googlesource_file(download_session, repo_url, version, relative_path):
    """
    Returns the contents of the text file with path within the given
    googlesource.com repo as a string.
    """
    if 'googlesource.com' not in repo_url:
        raise ValueError('Repository URL is not a googlesource.com URL: {}'.format(repo_url))
    full_url = repo_url + '/+/{}/{}?format=TEXT'.format(version, str(relative_path))
    get_logger().debug('Downloading: %s', full_url)
    response = download_session.get(full_url)
    if response.status_code == 404:
        raise _NotInRepoError()
    response.raise_for_status()
    # Assume all files that need patching are compatible with UTF-8
    return base64.b64decode(response.text, validate=True).decode('UTF-8')
def _download_googlesource_file(download_session, repo_url, version, relative_path):
    """
    Returns the contents of the text file with path within the given
    googlesource.com repo as a string.
    """
    if 'googlesource.com' not in repo_url:
        raise ValueError('Repository URL is not a googlesource.com URL: {}'.format(repo_url))
    full_url = repo_url + '/+/{}/{}?format=TEXT'.format(version, str(relative_path))
    get_logger().debug('Downloading: %s', full_url)
    response = download_session.get(full_url)
    if response.status_code == 404:
        raise _NotInRepoError()
    response.raise_for_status()
    # Assume all files that need patching are compatible with UTF-8
    return base64.b64decode(response.text, validate=True).decode('UTF-8')
def get_modified_files(patches_dir, series_path=Path('series')):
    """
    Yields all files modified by patches in the given patches directory
    """
    for patch_path in _read_series_file(patches_dir,
                                        series_path,
                                        join_dir=True):
        with patch_path.open(encoding=ENCODING) as file_obj:
            try:
                patch = unidiff.PatchSet(file_obj.read())
            except unidiff.errors.UnidiffParseError as exc:
                get_logger().exception('Could not parse patch: %s', patch_path)
                raise exc
            for patched_file in patch:
                if patched_file.is_removed_file or patched_file.is_modified_file:
                    yield patched_file.path
Exemple #36
0
def _download_if_needed(file_path, url, show_progress):
    """
    Downloads a file from url to the specified path file_path if necessary.

    If show_progress is True, download progress is printed to the console.
    """
    if file_path.exists():
        get_logger().info('%s already exists. Skipping download.', file_path)
    else:
        get_logger().info('Downloading %s ...', file_path)
        reporthook = None
        if show_progress:
            reporthook = _UrlRetrieveReportHook()
        urllib.request.urlretrieve(url, str(file_path), reporthook=reporthook)
        if show_progress:
            print()
def apply_patches(patch_path_iter, tree_path, reverse=False, patch_bin_path=None):
    """
    Applies or reverses a list of patches

    tree_path is the pathlib.Path of the source tree to patch
    patch_path_iter is a list or tuple of pathlib.Path to patch files to apply
    reverse is whether the patches should be reversed
    patch_bin_path is the pathlib.Path of the patch binary, or None to find it automatically
        See find_and_check_patch() for logic to find "patch"

    Raises ValueError if the patch binary could not be found.
    """
    patch_paths = list(patch_path_iter)
    patch_bin_path = find_and_check_patch(patch_bin_path=patch_bin_path)
    if reverse:
        patch_paths.reverse()

    logger = get_logger()
    for patch_path, patch_num in zip(patch_paths, range(1, len(patch_paths) + 1)):
        cmd = [
            str(patch_bin_path), '-p1', '--ignore-whitespace', '-i',
            str(patch_path), '-d',
            str(tree_path), '--posix',
        ]
        if reverse:
            cmd.append('--reverse')
            log_word = 'Reversing'
        else:
            cmd.append('--forward')
            log_word = 'Applying'
        logger.info('* %s %s (%s/%s)', log_word, patch_path.name, patch_num, len(patch_paths))
        logger.debug(' '.join(cmd))
        subprocess.run(cmd, check=True)
Exemple #38
0
def update_version(source, version_file_path):
    is_updated = False
    old_version_file_path = source / 'chrome' / 'VERSION'
    if (not filecmp.cmp(
            old_version_file_path, version_file_path, shallow=False)):
        old_version_file = open(old_version_file_path, 'w')
        new_version_file = open(version_file_path, 'r')

        old_version_file.write(new_version_file.read())

        old_version_file.close()
        new_version_file.close()

        get_logger().info('Breeze version has been updated!')
        is_updated = True
    return is_updated
def _download_if_needed(file_path, url, show_progress):
    """
    Downloads a file from url to the specified path file_path if necessary.

    If show_progress is True, download progress is printed to the console.
    """
    if file_path.exists():
        get_logger().info('%s already exists. Skipping download.', file_path)
    else:
        get_logger().info('Downloading %s ...', file_path)
        reporthook = None
        if show_progress:
            reporthook = _UrlRetrieveReportHook()
        urllib.request.urlretrieve(url, str(file_path), reporthook=reporthook)
        if show_progress:
            print()
def _apply_callback(args):
    logger = get_logger()
    for patch_dir in args.patches:
        logger.info('Applying patches from %s', patch_dir)
        apply_patches(generate_patches_from_series(patch_dir, resolve=True),
                      args.target,
                      patch_bin_path=args.patch_bin)
Exemple #41
0
def patch_string(patch,
                 patch_name,
                 tree_path,
                 reverse=False,
                 patch_bin_path=None):
    patch_bin_path = find_and_check_patch(patch_bin_path=patch_bin_path)
    tmp_patch_file = open(tree_path / 'tmp.patch', 'w')
    tmp_patch_file.write(patch)
    tmp_patch_file.close()
    logger = get_logger()
    cmd = [
        str(patch_bin_path), '-p1', '--ignore-whitespace', '-i',
        str(tree_path / 'tmp.patch'), '-d',
        str(tree_path), '--no-backup-if-mismatch'
    ]
    if reverse:
        cmd.append('--reverse')
        log_word = 'Reversing'
    else:
        cmd.append('--forward')
        log_word = 'Applying'

    logger.info('* %s %s', log_word, patch_name)
    logger.debug(' '.join(cmd))
    subprocess.run(cmd, check=True)
    os.remove(tree_path / 'tmp.patch')
def _retrieve_remote_files(file_iter):
    """
    Retrieves all file paths in file_iter from Google

    file_iter is an iterable of strings that are relative UNIX paths to
        files in the Chromium source.

    Returns a dict of relative UNIX path strings to a list of lines in the file as strings
    """

    files = dict()

    root_deps_tree = _initialize_deps_tree()

    try:
        total_files = len(file_iter)
    except TypeError:
        total_files = None

    logger = get_logger()
    if total_files is None:
        logger.info('Downloading remote files...')
    else:
        logger.info('Downloading %d remote files...', total_files)
    last_progress = 0
    file_count = 0
    fallback_repo_manager = _FallbackRepoManager()
    with _get_requests_session() as download_session:
        download_session.stream = False # To ensure connection to Google can be reused
        for file_path in file_iter:
            if total_files:
                file_count += 1
                current_progress = file_count * 100 // total_files // 5 * 5
                if current_progress != last_progress:
                    last_progress = current_progress
                    logger.info('%d%% downloaded', current_progress)
            else:
                current_progress = file_count // 20 * 20
                if current_progress != last_progress:
                    last_progress = current_progress
                    logger.info('%d files downloaded', current_progress)
            try:
                files[file_path] = _download_source_file(
                    download_session, root_deps_tree, fallback_repo_manager, file_path).split('\n')
            except _NotInRepoError:
                get_logger().warning('Could not find "%s" remotely. Skipping...', file_path)
    return files
Exemple #43
0
def _apply_callback(args):
    logger = get_logger()
    for patch_dir in args.patches:
        logger.info('Applying patches from %s', patch_dir)
        apply_patches(
            generate_patches_from_series(patch_dir, resolve=True),
            args.target,
            patch_bin_path=args.patch_bin)
Exemple #44
0
def main(args_list=None):
    """CLI entrypoint"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '--pruning',
        metavar='PATH',
        type=Path,
        default='pruning.list',
        help='The path to store pruning.list. Default: %(default)s')
    parser.add_argument(
        '--domain-substitution',
        metavar='PATH',
        type=Path,
        default='domain_substitution.list',
        help='The path to store domain_substitution.list. Default: %(default)s'
    )
    parser.add_argument(
        '--domain-regex',
        metavar='PATH',
        type=Path,
        default='domain_regex.list',
        help='The path to domain_regex.list. Default: %(default)s')
    parser.add_argument('-t',
                        '--tree',
                        metavar='PATH',
                        type=Path,
                        required=True,
                        help='The path to the source tree to use.')
    parser.add_argument(
        '--processes',
        metavar='NUM',
        type=int,
        default=None,
        help=
        'The maximum number of worker processes to create. Defaults to the number of system CPUs.'
    )
    args = parser.parse_args(args_list)
    if args.tree.exists() and not _dir_empty(args.tree):
        get_logger().info('Using existing source tree at %s', args.tree)
    else:
        get_logger().error('No source tree found. Aborting.')
        exit(1)
    get_logger().info('Computing lists...')
    pruning_set, domain_substitution_set, unused_patterns = compute_lists(
        args.tree,
        DomainRegexList(args.domain_regex).search_regex, args.processes)
    with args.pruning.open('w', encoding=_ENCODING) as file_obj:
        file_obj.writelines('%s\n' % line for line in pruning_set)
    with args.domain_substitution.open('w', encoding=_ENCODING) as file_obj:
        file_obj.writelines('%s\n' % line for line in domain_substitution_set)
    if unused_patterns.log_unused():
        get_logger().error(
            'Please update or remove unused patterns and/or prefixes. '
            'The lists have still been updated with the remaining valid entries.'
        )
        exit(1)
def _load_all_patches(series_iter, patches_dir):
    """
    Returns a tuple of the following:
    - boolean indicating success or failure of reading files
    - dict of relative UNIX path strings to unidiff.PatchSet
    """
    had_failure = False
    unidiff_dict = dict()
    for relative_path in series_iter:
        if relative_path in unidiff_dict:
            continue
        unidiff_dict[relative_path] = unidiff.PatchSet.from_filename(
            str(patches_dir / relative_path), encoding=ENCODING)
        if not (patches_dir / relative_path).read_text(encoding=ENCODING).endswith('\n'):
            had_failure = True
            get_logger().warning('Patch file does not end with newline: %s',
                                 str(patches_dir / relative_path))
    return had_failure, unidiff_dict
def _process_relative_to(unpack_root, relative_to):
    """
    For an extractor that doesn't support an automatic transform, move the extracted
    contents from the relative_to/ directory to the unpack_root

    If relative_to is None, nothing is done.
    """
    if relative_to is None:
        return
    relative_root = unpack_root / relative_to
    if not relative_root.is_dir():
        get_logger().error('Could not find relative_to directory in extracted files: %s',
                           relative_to)
        raise ExtractionError()
    for src_path in relative_root.iterdir():
        dest_path = unpack_root / src_path.name
        src_path.rename(dest_path)
    relative_root.rmdir()
Exemple #47
0
def _process_relative_to(unpack_root, relative_to):
    """
    For an extractor that doesn't support an automatic transform, move the extracted
    contents from the relative_to/ directory to the unpack_root

    If relative_to is None, nothing is done.
    """
    if relative_to is None:
        return
    relative_root = unpack_root / relative_to
    if not relative_root.is_dir():
        get_logger().error('Could not find relative_to directory in extracted files: %s',
                           relative_to)
        raise ExtractionError()
    for src_path in relative_root.iterdir():
        dest_path = unpack_root / src_path.name
        src_path.rename(dest_path)
    relative_root.rmdir()
def _find_7z_by_registry():
    """
    Return a string to 7-zip's 7z.exe from the Windows Registry.

    Raises ExtractionError if it fails.
    """
    import winreg #pylint: disable=import-error
    sub_key_7zfm = 'SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\7zFM.exe'
    try:
        with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, sub_key_7zfm) as key_handle:
            sevenzipfm_dir = winreg.QueryValueEx(key_handle, 'Path')[0]
    except OSError:
        get_logger().exception('Unable to locate 7-zip from the Windows Registry')
        raise ExtractionError()
    sevenzip_path = Path(sevenzipfm_dir, '7z.exe')
    if not sevenzip_path.is_file():
        get_logger().error('7z.exe not found at path from registry: %s', sevenzip_path)
    return sevenzip_path
    def get_fallback(self, current_relative_path, current_node, root_deps_tree):
        """
        Helper for _download_source_file

        It returns a new (repo_url, version, new_relative_path) to attempt a file download with
        """
        assert len(current_node) == 3
        # GN special processing
        try:
            new_relative_path = current_relative_path.relative_to('tools/gn')
        except ValueError:
            pass
        else:
            if current_node is root_deps_tree[_SRC_PATH]:
                get_logger().info('Redirecting to GN repo version %s for path: %s', self.gn_version,
                                  current_relative_path)
                return (self._GN_REPO_URL, self.gn_version, new_relative_path)
        return None, None, None
Exemple #50
0
    def get_fallback(self, current_relative_path, current_node, root_deps_tree):
        """
        Helper for _download_source_file

        It returns a new (repo_url, version, new_relative_path) to attempt a file download with
        """
        assert len(current_node) == 3
        # GN special processing
        try:
            new_relative_path = current_relative_path.relative_to('tools/gn')
        except ValueError:
            pass
        else:
            if current_node is root_deps_tree[_SRC_PATH]:
                get_logger().info('Redirecting to GN repo version %s for path: %s', self.gn_version,
                                  current_relative_path)
                return (self._GN_REPO_URL, self.gn_version, new_relative_path)
        return None, None, None
def check_downloads(download_info, cache_dir):
    """
    Check integrity of the downloads cache.

    download_info is the DownloadInfo of downloads to unpack.
    cache_dir is the pathlib.Path to the downloads cache.

    Raises source_retrieval.HashMismatchError when the computed and expected hashes do not match.
    """
    for download_name, download_properties in download_info.properties_iter():
        get_logger().info('Verifying hashes for "%s" ...', download_name)
        download_path = cache_dir / download_properties.download_filename
        with download_path.open('rb') as file_obj:
            archive_data = file_obj.read()
        for hash_name, hash_hex in _get_hash_pairs(download_properties, cache_dir):
            get_logger().debug('Verifying %s hash...', hash_name)
            hasher = hashlib.new(hash_name, data=archive_data)
            if not hasher.hexdigest().lower() == hash_hex.lower():
                raise HashMismatchError(download_path)
def _remove_files_with_dirs(root_dir, sorted_file_iter):
    '''
    Deletes a list of sorted files relative to root_dir, removing empty directories along the way
    '''
    past_parent = None
    for partial_path in sorted_file_iter:
        complete_path = Path(root_dir, partial_path)
        try:
            complete_path.unlink()
        except FileNotFoundError:
            get_logger().warning('Could not remove prepended patch: %s', complete_path)
        if past_parent != complete_path.parent:
            while past_parent and _dir_empty(past_parent):
                past_parent.rmdir()
                past_parent = past_parent.parent
            past_parent = complete_path.parent
    # Handle last path's directory
    while _dir_empty(complete_path.parent):
        complete_path.parent.rmdir()
        complete_path = complete_path.parent
def check_patch_readability(patches_dir, series_path=Path('series')):
    """
    Check if the patches from iterable patch_path_iter are readable.
        Patches that are not are logged to stdout.

    Returns True if warnings occured, False otherwise.
    """
    warnings = False
    for patch_path in _read_series_file(patches_dir, series_path, join_dir=True):
        if patch_path.exists():
            with patch_path.open(encoding=ENCODING) as file_obj:
                try:
                    unidiff.PatchSet(file_obj.read())
                except unidiff.errors.UnidiffParseError:
                    get_logger().exception('Could not parse patch: %s', patch_path)
                    warnings = True
                    continue
        else:
            get_logger().warning('Patch not found: %s', patch_path)
            warnings = True
    return warnings
def _download_source_file(download_session, root_deps_tree, fallback_repo_manager, target_file):
    """
    Downloads the source tree file from googlesource.com

    download_session is an active requests.Session() object
    deps_dir is a pathlib.Path to the directory containing a DEPS file.
    """
    current_node, current_relative_path = _get_target_file_deps_node(download_session,
                                                                     root_deps_tree, target_file)
    # Attempt download with potential fallback logic
    repo_url, version, _ = current_node
    try:
        # Download with DEPS-provided repo
        return _download_googlesource_file(download_session, repo_url, version,
                                           current_relative_path)
    except _NotInRepoError:
        pass
    get_logger().debug(
        'Path "%s" (relative: "%s") not found using DEPS tree; finding fallback repo...',
        target_file, current_relative_path)
    repo_url, version, current_relative_path = fallback_repo_manager.get_fallback(
        current_relative_path, current_node, root_deps_tree)
    if not repo_url:
        get_logger().error('No fallback repo found for "%s" (relative: "%s")', target_file,
                           current_relative_path)
        raise _NotInRepoError()
    try:
        # Download with fallback repo
        return _download_googlesource_file(download_session, repo_url, version,
                                           current_relative_path)
    except _NotInRepoError:
        pass
    get_logger().error('File "%s" (relative: "%s") not found in fallback repo "%s", version "%s"',
                       target_file, current_relative_path, repo_url, version)
    raise _NotInRepoError()
def merge_platform_patches(platform_patches_dir, prepend_patches_dir):
    '''
    Prepends prepend_patches_dir into platform_patches_dir

    Returns True if successful, False otherwise
    '''
    if not (platform_patches_dir / _SERIES).exists():
        get_logger().error('Unable to find platform series file: %s',
                           platform_patches_dir / _SERIES)
        return False

    # Make series.orig file
    shutil.copyfile(str(platform_patches_dir / _SERIES), str(platform_patches_dir / _SERIES_ORIG))

    # Make series.prepend
    shutil.copyfile(str(prepend_patches_dir / _SERIES), str(platform_patches_dir / _SERIES_PREPEND))

    # Merge patches
    merge_patches([prepend_patches_dir], platform_patches_dir, prepend=True)
    (platform_patches_dir / _SERIES).replace(platform_patches_dir / _SERIES_MERGED)

    return True
def apply_substitution(regex_path, files_path, source_tree, domainsub_cache):
    """
    Substitute domains in source_tree with files and substitutions,
        and save the pre-domain substitution archive to presubdom_archive.

    regex_path is a pathlib.Path to domain_regex.list
    files_path is a pathlib.Path to domain_substitution.list
    source_tree is a pathlib.Path to the source tree.
    domainsub_cache is a pathlib.Path to the domain substitution cache.

    Raises NotADirectoryError if the patches directory is not a directory or does not exist
    Raises FileNotFoundError if the source tree or required directory does not exist.
    Raises FileExistsError if the domain substitution cache already exists.
    Raises ValueError if an entry in the domain substitution list contains the file index
        hash delimiter.
    """
    if not source_tree.exists():
        raise FileNotFoundError(source_tree)
    if not regex_path.exists():
        raise FileNotFoundError(regex_path)
    if not files_path.exists():
        raise FileNotFoundError(files_path)
    if domainsub_cache.exists():
        raise FileExistsError(domainsub_cache)
    resolved_tree = source_tree.resolve()
    regex_pairs = DomainRegexList(regex_path).regex_pairs
    fileindex_content = io.BytesIO()
    with tarfile.open(
            str(domainsub_cache), 'w:%s' % domainsub_cache.suffix[1:],
            compresslevel=1) as cache_tar:
        for relative_path in filter(len, files_path.read_text().splitlines()):
            if _INDEX_HASH_DELIMITER in relative_path:
                # Cache tar will be incomplete; remove it for convenience
                cache_tar.close()
                domainsub_cache.unlink()
                raise ValueError(
                    'Path "%s" contains the file index hash delimiter "%s"' % relative_path,
                    _INDEX_HASH_DELIMITER)
            path = resolved_tree / relative_path
            if not path.exists():
                get_logger().warning('Skipping non-existant path: %s', path)
                continue
            if path.is_symlink():
                get_logger().warning('Skipping path that has become a symlink: %s', path)
                continue
            crc32_hash, orig_content = _substitute_path(path, regex_pairs)
            if crc32_hash is None:
                get_logger().info('Path has no substitutions: %s', relative_path)
                continue
            fileindex_content.write('{}{}{:08x}\n'.format(relative_path, _INDEX_HASH_DELIMITER,
                                                          crc32_hash).encode(ENCODING))
            orig_tarinfo = tarfile.TarInfo(str(Path(_ORIG_DIR) / relative_path))
            orig_tarinfo.size = len(orig_content)
            with io.BytesIO(orig_content) as orig_file:
                cache_tar.addfile(orig_tarinfo, orig_file)
        fileindex_tarinfo = tarfile.TarInfo(_INDEX_LIST)
        fileindex_tarinfo.size = fileindex_content.tell()
        fileindex_content.seek(0)
        cache_tar.addfile(fileindex_tarinfo, fileindex_content)
def _extract_tar_with_python(archive_path, output_dir, relative_to):
    get_logger().debug('Using pure Python tar extractor')

    class NoAppendList(list):
        """Hack to workaround memory issues with large tar files"""

        def append(self, obj):
            pass

    # Simple hack to check if symlinks are supported
    try:
        os.symlink('', '')
    except FileNotFoundError:
        # Symlinks probably supported
        symlink_supported = True
    except OSError:
        # Symlinks probably not supported
        get_logger().info('System does not support symlinks. Ignoring them.')
        symlink_supported = False
    except BaseException:
        # Unexpected exception
        get_logger().exception('Unexpected exception during symlink support check.')
        raise ExtractionError()

    with tarfile.open(str(archive_path), 'r|%s' % archive_path.suffix[1:]) as tar_file_obj:
        tar_file_obj.members = NoAppendList()
        for tarinfo in tar_file_obj:
            try:
                if relative_to is None:
                    destination = output_dir / PurePosixPath(tarinfo.name)
                else:
                    destination = output_dir / PurePosixPath(tarinfo.name).relative_to(relative_to)
                if tarinfo.issym() and not symlink_supported:
                    # In this situation, TarFile.makelink() will try to create a copy of the
                    # target. But this fails because TarFile.members is empty
                    # But if symlinks are not supported, it's safe to assume that symlinks
                    # aren't needed. The only situation where this happens is on Windows.
                    continue
                if tarinfo.islnk():
                    # Derived from TarFile.extract()
                    new_target = output_dir / PurePosixPath(
                        tarinfo.linkname).relative_to(relative_to)
                    tarinfo._link_target = new_target.as_posix() # pylint: disable=protected-access
                if destination.is_symlink():
                    destination.unlink()
                tar_file_obj._extract_member(tarinfo, str(destination)) # pylint: disable=protected-access
            except BaseException:
                get_logger().exception('Exception thrown for tar member: %s', tarinfo.name)
                raise ExtractionError()
    def _parse_data(self, path):
        """
        Parses an INI file located at path

        Raises schema.SchemaError if validation fails
        """

        def _section_generator(data):
            for section in data:
                if section == configparser.DEFAULTSECT:
                    continue
                yield section, dict(
                    filter(lambda x: x[0] not in self._ini_vars, data.items(section)))

        new_data = configparser.ConfigParser(defaults=self._ini_vars)
        with path.open(encoding=ENCODING) as ini_file:
            new_data.read_file(ini_file, source=str(path))
        try:
            self._schema.validate(dict(_section_generator(new_data)))
        except schema.SchemaError as exc:
            get_logger().error('downloads.ini failed schema validation (located in %s)', path)
            raise exc
        return new_data
def retrieve_downloads(download_info, cache_dir, show_progress, disable_ssl_verification=False):
    """
    Retrieve downloads into the downloads cache.

    download_info is the DowloadInfo of downloads to retrieve.
    cache_dir is the pathlib.Path to the downloads cache.
    show_progress is a boolean indicating if download progress is printed to the console.
    disable_ssl_verification is a boolean indicating if certificate verification
        should be disabled for downloads using HTTPS.

    Raises FileNotFoundError if the downloads path does not exist.
    Raises NotADirectoryError if the downloads path is not a directory.
    """
    if not cache_dir.exists():
        raise FileNotFoundError(cache_dir)
    if not cache_dir.is_dir():
        raise NotADirectoryError(cache_dir)
    if disable_ssl_verification:
        import ssl
        # TODO: Remove this or properly implement disabling SSL certificate verification
        orig_https_context = ssl._create_default_https_context #pylint: disable=protected-access
        ssl._create_default_https_context = ssl._create_unverified_context #pylint: disable=protected-access
    try:
        for download_name, download_properties in download_info.properties_iter():
            get_logger().info('Downloading "%s" to "%s" ...', download_name,
                              download_properties.download_filename)
            download_path = cache_dir / download_properties.download_filename
            _download_if_needed(download_path, download_properties.url, show_progress)
            if download_properties.has_hash_url():
                get_logger().info('Downloading hashes for "%s"', download_name)
                _, hash_filename, hash_url = download_properties.hashes['hash_url']
                _download_if_needed(cache_dir / hash_filename, hash_url, show_progress)
    finally:
        # Try to reduce damage of hack by reverting original HTTPS context ASAP
        if disable_ssl_verification:
            ssl._create_default_https_context = orig_https_context #pylint: disable=protected-access
def main(args_list=None):
    """CLI entrypoint"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '--pruning',
        metavar='PATH',
        type=Path,
        default='pruning.list',
        help='The path to store pruning.list. Default: %(default)s')
    parser.add_argument(
        '--domain-substitution',
        metavar='PATH',
        type=Path,
        default='domain_substitution.list',
        help='The path to store domain_substitution.list. Default: %(default)s')
    parser.add_argument(
        '--domain-regex',
        metavar='PATH',
        type=Path,
        default='domain_regex.list',
        help='The path to domain_regex.list. Default: %(default)s')
    parser.add_argument(
        '-t',
        '--tree',
        metavar='PATH',
        type=Path,
        required=True,
        help='The path to the source tree to use.')
    args = parser.parse_args(args_list)
    if args.tree.exists() and not _dir_empty(args.tree):
        get_logger().info('Using existing source tree at %s', args.tree)
    else:
        get_logger().error('No source tree found. Aborting.')
        exit(1)
    get_logger().info('Computing lists...')
    pruning_list, domain_substitution_list, unused_patterns = compute_lists(
        args.tree,
        DomainRegexList(args.domain_regex).search_regex)
    with args.pruning.open('w', encoding=_ENCODING) as file_obj:
        file_obj.writelines('%s\n' % line for line in pruning_list)
    with args.domain_substitution.open('w', encoding=_ENCODING) as file_obj:
        file_obj.writelines('%s\n' % line for line in domain_substitution_list)
    if unused_patterns.log_unused():
        get_logger().error('Please update or remove unused patterns and/or prefixes. '
                           'The lists have still been updated with the remaining valid entries.')
        exit(1)