Ejemplo n.º 1
0
def update_request_env_vars(request_id: int,
                            env_vars: Dict[str, Dict[str, str]]) -> None:
    """Update environment variables of a request.

    :param int request_id: the id of a request to update the environment variables.
    :param dict env_vars: mapping of environment variables to record. The keys represent
        the environment variable name, and its value should be another map with the "value" and
        "kind" attributes, e.g. {"NAME": {"value": "VALUE", "kind": "KIND"}}.
    :raise CachitoError: if the request to the Cachito API fails
    """
    config = get_worker_config()
    request_url = _get_request_url(request_id)
    payload = {"environment_variables": env_vars}
    try:
        rv = requests_auth_session.patch(request_url,
                                         json=payload,
                                         timeout=config.cachito_api_timeout)
    except requests.RequestException:
        msg = (
            f"The connection failed when updating environment variables on the request {request_id}"
        )
        log.exception(msg)
        raise CachitoError(msg)
    if not rv.ok:
        log.error(
            "The worker failed to update environment variables on the request %d. "
            "The status was %d. The text was:\n%s",
            request_id,
            rv.status_code,
            rv.text,
        )
        raise CachitoError(
            f"Updating environment variables on request {request_id} failed")
Ejemplo n.º 2
0
def verify_checksum(file_path: str,
                    checksum_info: ChecksumInfo,
                    chunk_size: int = 10240):
    """
    Verify the checksum of the file at the given path matches the expected checksum info.

    :param str file_path: the path to the file to be verified
    :param ChecksumInfo checksum_info: the expected checksum information
    :param int chunk_size: the amount of bytes to read at a time
    :raise CachitoError: if the checksum is not as expected
    """
    filename = os.path.basename(file_path)

    try:
        hasher = hash_file(file_path, chunk_size, checksum_info.algorithm)
    except UnknownHashAlgorithm as exc:
        msg = f"Cannot perform checksum on the file {filename}, {exc}"
        raise CachitoError(msg)

    computed_hexdigest = hasher.hexdigest()

    if computed_hexdigest != checksum_info.hexdigest:
        msg = (
            f"The file {filename} has an unexpected checksum value, "
            f"expected {checksum_info.hexdigest} but computed {computed_hexdigest}"
        )
        raise CachitoError(msg)
Ejemplo n.º 3
0
    def _verify_archive(self):
        """
        Verify the archive containing the git repository.

        :raises CachitoError: if 'git fsck' fails for the extracted sources
        """
        log.debug("Verifying the archive at %s", self.sources_dir.archive_path)
        if not os.path.exists(self.sources_dir.archive_path) or not tarfile.is_tarfile(
            self.sources_dir.archive_path
        ):
            err_msg = f"No valid archive found at {self.sources_dir.archive_path}"
            log.exception(err_msg)
            raise CachitoError(err_msg)

        err_msg = {
            "log": "Cachito found an error when verifying the generated archive at %s. %s",
            "exception": f"Invalid archive at {self.sources_dir.archive_path!s}",
        }
        with tempfile.TemporaryDirectory(prefix="cachito-") as temp_dir:
            cmd = ["git", "fsck"]
            repo_path = os.path.join(temp_dir, "app")
            try:
                with tarfile.open(self.sources_dir.archive_path, mode="r:gz") as tar:
                    tar.extractall(temp_dir)
            except (tarfile.ExtractError, zlib.error, OSError) as exc:
                log.error(err_msg["log"], self.sources_dir.archive_path, exc)
                raise CachitoError(err_msg["exception"])

            try:
                run_cmd(cmd, {"cwd": repo_path, "check": True})
            except subprocess.CalledProcessError as exc:
                msg = f"{err_msg['log']}. STDERR: %s"
                log.error(msg, self.sources_dir.archive_path, exc, exc.stderr)
                raise CachitoError(err_msg["exception"])
Ejemplo n.º 4
0
def _patch_request_or_fail(request_id: int, payload: dict,
                           connect_error_msg: str,
                           status_error_msg: str) -> None:
    """
    Try to update the specified request using the Cachito PATCH API.

    Both error messages can contain the {exc} placeholder which will be replaced by the actual
    exception.

    :param request_id: ID of the request to get
    :param payload: the JSON data to send to the PATCH endpoint
    :param connect_error_msg: error message to raise if the connection fails
    :param status_error_msg: error message to raise if the response status is 4xx or 5xx
    :raises CachitoError: if the connection fails or the API returns an error response
    """
    config = get_worker_config()
    request_url = f'{config.cachito_api_url.rstrip("/")}/requests/{request_id}'

    try:
        rv = requests_auth_session.patch(request_url,
                                         json=payload,
                                         timeout=config.cachito_api_timeout)
        rv.raise_for_status()
    except requests.HTTPError as e:
        msg = status_error_msg.format(exc=e)
        log.exception(msg)
        raise CachitoError(msg)
    except requests.RequestException as e:
        msg = connect_error_msg.format(exc=e)
        log.exception(msg)
        raise CachitoError(msg)
Ejemplo n.º 5
0
    def clone_and_archive(self):
        """
        Clone the git repository and create the compressed source archive.

        :raises CachitoError: if cloning the repository fails or if the archive can't be created
        """
        with tempfile.TemporaryDirectory(prefix='cachito-') as temp_dir:
            clone_path = os.path.join(temp_dir, 'repo')
            log.debug('Cloning the Git repository from %s', self.url)
            # Don't allow git to prompt for a username if we don't have access
            os.environ['GIT_TERMINAL_PROMPT'] = '0'
            try:
                repo = git.repo.Repo.clone_from(self.url,
                                                clone_path,
                                                no_checkout=True)
            except:  # noqa E722
                log.exception('Cloning the Git repository from %s failed',
                              self.url)
                raise CachitoError('Cloning the Git repository failed')

            try:
                repo.head.reference = repo.commit(self.ref)
                repo.head.reset(index=True, working_tree=True)
            except:  # noqa E722
                log.exception('Checking out the Git ref "%s" failed', self.ref)
                raise CachitoError(
                    'Checking out the Git repository failed. Please verify the supplied reference '
                    f'of "{self.ref}" is valid.')

            log.debug('Creating the archive at %s', self.archive_path)
            with tarfile.open(self.archive_path,
                              mode='w:gz') as bundle_archive:
                bundle_archive.add(clone_path, 'app')
Ejemplo n.º 6
0
def _get_request_or_fail(request_id: int, connect_error_msg: str, status_error_msg: str) -> dict:
    """
    Try to download the JSON data for a request from the Cachito API.

    Both error messages can contain the {exc} placeholder which will be replaced by the actual
    exception.

    :param request_id: ID of the request to get
    :param connect_error_msg: error message to raise if the connection fails
    :param status_error_msg: error message to raise if the response status is 4xx or 5xx
    :raises CachitoError: if the connection fails or the API returns an error response
    """
    # Import this here to avoid a circular import (tasks -> requests -> tasks)
    from cachito.workers.requests import requests_session

    config = get_worker_config()
    request_url = f'{config.cachito_api_url.rstrip("/")}/requests/{request_id}'

    try:
        rv = requests_session.get(request_url, timeout=config.cachito_api_timeout)
        rv.raise_for_status()
    except requests.HTTPError as e:
        msg = status_error_msg.format(exc=e)
        log.exception(msg)
        raise CachitoError(msg)
    except requests.RequestException as e:
        msg = connect_error_msg.format(exc=e)
        log.exception(msg)
        raise CachitoError(msg)

    return rv.json()
Ejemplo n.º 7
0
def _vet_local_deps(dependencies: List[dict], module_name: str,
                    allowed_patterns: List[str]):
    """
    Fail if any dependency is replaced by a local path unless the module is allowlisted.

    Also fail if the module is allowlisted but the path is absolute or outside repository.
    """
    for dep in dependencies:
        name = dep["name"]
        version = dep["version"]

        if version.startswith("."):
            log.debug(
                "Module %s wants to replace %s with a local dependency: %s",
                module_name,
                name,
                version,
            )
            if ".." in Path(version).parts:
                raise CachitoError(
                    f"Path to gomod dependency contains '..': {version}. "
                    "Cachito does not support this case.")
            _fail_unless_allowlisted(module_name, name, allowed_patterns)
        elif version.startswith("/") or PureWindowsPath(version).root:
            # This will disallow paths starting with '/', '\' or '<drive letter>:\'
            raise CachitoError(
                f"Absolute paths to gomod dependencies are not supported: {version}"
            )
Ejemplo n.º 8
0
def set_request_state(request_id, state, state_reason):
    """
    Set the state of the request using the Cachito API.

    :param int request_id: the ID of the Cachito request
    :param str state: the state to set the Cachito request to
    :param str state_reason: the state reason to set the Cachito request to
    :raise CachitoError: if the request to the Cachito API fails
    """
    # Import this here to avoid a circular import
    from cachito.workers.requests import requests_auth_session

    config = get_worker_config()
    request_url = f'{config.cachito_api_url.rstrip("/")}/requests/{request_id}'

    log.info(
        'Setting the state of request %d to "%s" with the reason "%s"',
        request_id, state, state_reason
    )
    payload = {'state': state, 'state_reason': state_reason}
    try:
        rv = requests_auth_session.patch(
            request_url, json=payload, timeout=config.cachito_api_timeout)
    except requests.RequestException:
        msg = f'The connection failed when setting the state to "{state}" on request {request_id}'
        log.exception(msg)
        raise CachitoError(msg)

    if not rv.ok:
        log.error(
            'The worker failed to set request %d to the "%s" state. The status was %d. '
            'The text was:\n%s',
            request_id, state, rv.status_code, rv.text,
        )
        raise CachitoError(f'Setting the state to "{state}" on request {request_id} failed')
Ejemplo n.º 9
0
def update_request_with_config_files(request_id, config_files):
    """
    Update the Cachito request with the input configuration files.

    :param list config_files: the list of configuration files to add to the request
    :raise CachitoError: if the request to the Cachito API fails
    """
    # Import this here to avoid a circular import
    from cachito.workers.requests import requests_auth_session

    log.info("Adding %d configuration files to the request %d", len(config_files), request_id)
    config = get_worker_config()
    request_url = _get_request_url(request_id) + "/configuration-files"

    try:
        rv = requests_auth_session.post(
            request_url, json=config_files, timeout=config.cachito_api_timeout
        )
    except requests.RequestException:
        msg = f"The connection failed when adding configuration files to the request {request_id}"
        log.exception(msg)
        raise CachitoError(msg)

    if not rv.ok:
        log.error(
            "The worker failed to add configuration files to the request %d. The status was %d. "
            "The text was:\n%s",
            request_id,
            rv.status_code,
            rv.text,
        )
        raise CachitoError(f"Adding configuration files on request {request_id} failed")
Ejemplo n.º 10
0
def update_request_with_deps(request_id,
                             deps,
                             env_vars=None,
                             pkg_manager=None,
                             packages=None):
    """
    Update the Cachito request with the resolved dependencies.

    :param int request_id: the ID of the Cachito request
    :param list deps: the list of dependency dictionaries to record
    :param dict env_vars: mapping of environment variables to record
    :param str pkg_manager: a package manager to add to the request if auto-detection was used
    :param list packages: the list of packages that were resolved
    :raise CachitoError: if the request to the Cachito API fails
    """
    # Import this here to avoid a circular import
    from cachito.workers.requests import requests_auth_session
    config = get_worker_config()
    request_url = f'{config.cachito_api_url.rstrip("/")}/requests/{request_id}'

    log.info('Adding %d dependencies to request %d', len(deps), request_id)
    for index in range(0, len(deps), config.cachito_deps_patch_batch_size):
        batch_upper_limit = index + config.cachito_deps_patch_batch_size
        payload = {'dependencies': deps[index:batch_upper_limit]}
        if index == 0:
            if env_vars:
                log.info('Adding environment variables to the request %d: %s',
                         request_id, env_vars)
                payload['environment_variables'] = env_vars
            if pkg_manager:
                log.info(
                    'Adding the package manager "%s" to the request %d',
                    pkg_manager,
                    request_id,
                )
                payload['pkg_managers'] = [pkg_manager]
            if packages:
                log.info('Adding the packages "%s" to the request %d',
                         packages, request_id)
                payload['packages'] = packages
        try:
            log.info('Patching deps {} through {} out of {}'.format(
                index + 1, min(batch_upper_limit, len(deps)), len(deps)))
            rv = requests_auth_session.patch(
                request_url, json=payload, timeout=config.cachito_api_timeout)
        except requests.RequestException:
            msg = f'The connection failed when setting the dependencies on request {request_id}'
            log.exception(msg)
            raise CachitoError(msg)

        if not rv.ok:
            log.error(
                'The worker failed to set the dependencies on request %d. The status was %d. '
                'The text was:\n%s',
                request_id,
                rv.status_code,
                rv.text,
            )
            raise CachitoError(
                f'Setting the dependencies on request {request_id} failed')
Ejemplo n.º 11
0
def verify_checksum(file_path, checksum_info, chunk_size=10240):
    """
    Verify the checksum of the file at the given path matches the expected checksum info.

    :param str file_path: the path to the file to be verified
    :param ChecksumInfo checksum_info: the expected checksum information
    :param int chunk_size: the amount of bytes to read at a time
    :raise CachitoError: if the checksum is not as expected
    """
    filename = os.path.basename(file_path)
    try:
        hasher = hashlib.new(checksum_info.algorithm)
    except ValueError as exc:
        msg = f"Cannot perform checksum on the file {filename}, {exc}"
        log.exception(msg)
        raise CachitoError(msg)

    with open(file_path, "rb") as f:
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            hasher.update(chunk)
    computed_hexdigest = hasher.hexdigest()

    if computed_hexdigest != checksum_info.hexdigest:
        msg = (
            f"The file {filename} has an unexpected checksum value, "
            f"expected {checksum_info.hexdigest} but computed {computed_hexdigest}"
        )
        log.error(msg)
        raise CachitoError(msg)
Ejemplo n.º 12
0
def resolve_npm(app_source_path, request, skip_deps=None):
    """
    Resolve and fetch npm dependencies for the given app source archive.

    :param str app_source_path: the full path to the application source code
    :param dict request: the Cachito request this is for
    :param set skip_deps: a set of dependency identifiers to not download because they've already
        been downloaded for this request
    :return: a dictionary that has the following keys:
        ``deps`` which is the list of dependencies,
        ``downloaded_deps`` which is a set of the dependency identifiers of the dependencies that
        were downloaded as part of this function's execution,
        ``lock_file`` which is the lock file if it was modified,
        ``lock_file_name`` is the name of the lock file that was used,
        ``package`` which is the dictionary describing the main package, and
        ``package.json`` which is the package.json file if it was modified.
    :rtype: dict
    :raises CachitoError: if fetching the dependencies fails or required files are missing
    """
    # npm-shrinkwrap.json and package-lock.json share the same format but serve slightly
    # different purposes. See the following documentation for more information:
    # https://docs.npmjs.com/files/package-lock.json.
    for lock_file in ("npm-shrinkwrap.json", "package-lock.json"):
        package_lock_path = os.path.join(app_source_path, lock_file)
        if os.path.exists(package_lock_path):
            break
    else:
        raise CachitoError(
            "The npm-shrinkwrap.json or package-lock.json file must be present for the npm "
            "package manager")

    package_json_path = os.path.join(app_source_path, "package.json")
    if not os.path.exists(package_json_path):
        raise CachitoError(
            "The package.json file must be present for the npm package manager"
        )

    try:
        package_and_deps_info = get_package_and_deps(package_json_path,
                                                     package_lock_path)
    except KeyError:
        msg = f"The lock file {lock_file} has an unexpected format"
        log.exception(msg)
        raise CachitoError(msg)

    package_and_deps_info["lock_file_name"] = lock_file
    # By downloading the dependencies, it stores the tarballs in the bundle and also stages the
    # content in the npm repository for the request
    proxy_repo_url = get_npm_proxy_repo_url(request["id"])
    package_and_deps_info["downloaded_deps"] = download_dependencies(
        request["id"], package_and_deps_info["deps"], proxy_repo_url,
        skip_deps)

    # Remove all the "bundled" keys since that is an implementation detail that should not be
    # exposed outside of this function
    for dep in package_and_deps_info["deps"]:
        dep.pop("bundled")
        dep.pop("version_in_nexus")

    return package_and_deps_info
Ejemplo n.º 13
0
def search_components(in_nexus_hoster=True, **query_params):
    """
    Search for components using the Nexus REST API.

    :param in_nexus_hoster: whether to search in the Nexus hoster instance, if one is
        available. If set to false, will search in the self hosted instance
    :param query_params: the query parameters to filter
    :return: the list of components returned by the search
    :rtype: list<dict>
    :raise CachitoError: if the search fails
    """
    config = get_worker_config()
    if in_nexus_hoster:
        username, password = get_nexus_hoster_credentials()
        nexus_url = _get_nexus_hoster_url()
    else:
        username = config.cachito_nexus_username
        password = config.cachito_nexus_password
        nexus_url = config.cachito_nexus_url

    auth = requests.auth.HTTPBasicAuth(username, password)
    url = f"{nexus_url}/service/rest/v1/search"
    # Create a copy so that the original query parameters are unaltered later on
    params = copy.deepcopy(query_params)

    log.debug(
        "Searching Nexus for components using the following query parameters: %r", query_params
    )
    items = []
    while True:
        try:
            rv = nexus_requests_session.get(
                url, auth=auth, params=params, timeout=config.cachito_nexus_timeout
            )
        except requests.RequestException:
            msg = "Could not connect to the Nexus instance to search for components"
            log.exception(msg)
            raise CachitoError(msg)

        if not rv.ok:
            log.error(
                "Failed to search for components (%r) in Nexus with the status code %d and the "
                "text: %s",
                query_params,
                rv.status_code,
                rv.text,
            )
            raise CachitoError("Failed to search for components in Nexus")

        rv_json = rv.json()
        items.extend(rv_json["items"])

        # Handle pagination
        if rv_json["continuationToken"]:
            log.debug("Getting the next page of Nexus component search results")
            params["continuationToken"] = rv_json["continuationToken"]
        else:
            break

    return items
Ejemplo n.º 14
0
def update_request_with_deps(request_id, deps):
    """
    Update the Cachito request with the resolved dependencies.

    :param int request_id: the ID of the Cachito request
    :param list deps: the list of dependency dictionaries to record
    :raise CachitoError: if the request to the Cachito API fails
    """
    # Import this here to avoid a circular import
    from cachito.workers.requests import requests_auth_session
    config = get_worker_config()
    request_url = f'{config.cachito_api_url.rstrip("/")}/requests/{request_id}'

    log.info('Adding %d dependencies to request %d', len(deps), request_id)
    payload = {'dependencies': deps}
    try:
        rv = requests_auth_session.patch(request_url, json=payload, timeout=30)
    except requests.RequestException:
        msg = f'The connection failed when setting the dependencies on request {request_id}'
        log.exception(msg)
        raise CachitoError(msg)

    if not rv.ok:
        log.error(
            'The worker failed to set the dependencies on request %d. The status was %d. '
            'The text was:\n%s',
            request_id, rv.status_code, rv.text,
        )
        raise CachitoError(f'Setting the dependencies on request {request_id} failed')
Ejemplo n.º 15
0
def upload_component(params, payload, to_nexus_hoster, additional_data=None):
    """
    Push a payload to the Nexus upload endpoint.

    See https://help.sonatype.com/repomanager3/rest-and-integration-api/components-api for further
    reference.

    :param dict params: the request parameters to the upload endpoint (e.g. {"repository": NAME})
    :param dict payload: Nexus API compliant file payload
    :param bool to_nexus_hoster: Use the nexus hoster instance, if available
    :param dict additional_data: non-file Nexus API compliant file payload. This is needed for
        string params that would be passed in the "file" param. Note that python requests does not
        support sending non-files in the file payload. See
        https://issues.sonatype.org/browse/NEXUS-21946 for further reference.
    :raise CachitoError: if the upload fails
    """
    # Import this here to avoid a circular import
    from cachito.workers.requests import requests_session

    config = get_worker_config()
    if to_nexus_hoster:
        username, password = get_nexus_hoster_credentials()
        nexus_url = _get_nexus_hoster_url()
    else:
        username = config.cachito_nexus_username
        password = config.cachito_nexus_password
        nexus_url = config.cachito_nexus_url

    auth = requests.auth.HTTPBasicAuth(username, password)
    endpoint = f"{nexus_url}/service/rest/v1/components"

    try:
        rv = requests_session.post(
            endpoint,
            auth=auth,
            files=payload,
            data=additional_data,
            params=params,
            timeout=config.cachito_nexus_timeout,
        )
    except requests.RequestException:
        log.exception(
            "Could not connect to the Nexus instance to upload the component")
        raise CachitoError(
            "Could not connect to the Nexus instance to upload a component")

    if not rv.ok:
        log.error(
            "Failed to upload a component with the status code %d and the text: %s",
            rv.status_code,
            rv.text,
        )
        raise CachitoError("Failed to upload a component to Nexus")
Ejemplo n.º 16
0
    def clone_and_archive(self):
        """
        Clone the git repository and create the compressed source archive.

        :raises CachitoError: if cloning the repository fails or if the archive can't be created
        """
        error = 'An unexpected error was encountered when downloading the source'
        with tempfile.TemporaryDirectory(prefix='cachito-') as temp_dir:
            clone_path = os.path.join(temp_dir, 'repo')

            cmd = ['git', 'clone', '-q', '--no-checkout', self.url, clone_path]
            log.debug('Cloning the repo with "%s"', ' '.join(cmd))
            git_clone = subprocess.run(cmd,
                                       capture_output=True,
                                       universal_newlines=True,
                                       encoding='utf-8')
            if git_clone.returncode != 0:
                log.error(
                    'Cloning the git repository with "%s" failed with: %s',
                    ' '.join(cmd),
                    git_clone.stderr,
                )
                raise CachitoError('Cloning the git repository failed')

            cmd = [
                'git',
                '-C',
                clone_path,
                'archive',
                '-o',
                self.archive_path,
                '--prefix=app/',
                self.ref,
            ]
            log.debug('Creating the archive with "%s"', ' '.join(cmd))
            git_archive = subprocess.run(cmd,
                                         capture_output=True,
                                         universal_newlines=True,
                                         encoding='utf-8')
            if git_archive.returncode != 0:
                log.error(
                    'Archiving the git repository with "%s" failed with: %s',
                    ' '.join(cmd),
                    git_archive.stderr,
                )
                if 'Not a valid object name' in git_archive.stderr:
                    error = 'An invalid reference was provided'
                # If git archive failed but still created the archive, then clean it up
                if os.path.exists(self.archive_path):
                    os.remove(self.archive_path)
                raise CachitoError(error)
Ejemplo n.º 17
0
def setup_task_logging(task_id, task, **kwargs):
    """
    Set up the logging for the task via adding a file log handler.

    If ``cachito_request_file_logs_dir`` is set, a temporary log handler is added before the
    task is invoked.
    If ``cahito_request_file_logs_dir`` is not set, the temporary log handler will not be added.

    :param str task_id: the task ID
    :param class task: the class of the task being executed
    """
    worker_config = get_worker_config()
    log_dir = worker_config.cachito_request_file_logs_dir
    log_level = worker_config.cachito_request_file_logs_level
    log_format = worker_config.cachito_request_file_logs_format

    request_log_handler = None
    if log_dir:
        log_formatter = logging.Formatter(log_format)
        request_id = _get_function_arg_value("request_id", task.__wrapped__,
                                             kwargs["args"], kwargs["kwargs"])
        if not request_id:
            raise CachitoError("Unable to get 'request_id'")

        log_file_path = os.path.join(log_dir, f"{request_id}.log")
        request_log_handler = logging.FileHandler(log_file_path)
        request_log_handler.setLevel(log_level)
        request_log_handler.setFormatter(log_formatter)
        os.chmod(log_file_path, worker_config.cachito_request_file_logs_perm)
        logger = logging.getLogger()
        logger.addHandler(request_log_handler)
Ejemplo n.º 18
0
    def clone_and_archive(self, gitsubmodule=False):
        """
        Clone the git repository and create the compressed source archive.

        :param bool gitsubmodule: a bool to determine whether git submodules need to be processed.
        :raises CachitoError: if cloning the repository fails or if the archive can't be created
        """
        with tempfile.TemporaryDirectory(prefix="cachito-") as temp_dir:
            log.debug("Cloning the Git repository from %s", self.url)
            clone_path = os.path.join(temp_dir, "repo")
            try:
                repo = git.repo.Repo.clone_from(
                    self.url,
                    clone_path,
                    no_checkout=True,
                    # Don't allow git to prompt for a username if we don't have access
                    env={"GIT_TERMINAL_PROMPT": "0"},
                )
            except:  # noqa E722
                log.exception("Cloning the Git repository from %s failed", self.url)
                raise CachitoError("Cloning the Git repository failed")

            self._reset_git_head(repo)

            if gitsubmodule:
                self.update_git_submodules(repo)

            repo.git.gc("--prune=now")
            self._create_archive(repo.working_dir)
Ejemplo n.º 19
0
    def update_and_archive(self, previous_archive, gitsubmodule=False):
        """
        Update the existing Git history and create a source archive.

        :param str previous_archive: path to an archive file created before.
        :param bool gitsubmodule: a bool to determine whether git submodules need to be processed.
        :raises CachitoError: if pulling the Git history from the remote repo or
            the checkout of the target Git ref fails.
        """
        with tempfile.TemporaryDirectory(prefix="cachito-") as temp_dir:
            with tarfile.open(previous_archive, mode="r:gz") as tar:
                tar.extractall(temp_dir)

            repo = git.Repo(os.path.join(temp_dir, "app"))
            try:
                # The reference must be specified to handle commits which are not part
                # of a branch.
                repo.remote().fetch(refspec=self.ref)
            except:  # noqa E722
                log.exception("Failed to fetch from remote %s", self.url)
                raise CachitoError("Failed to fetch from the remote Git repository")

            self._reset_git_head(repo)
            if gitsubmodule:
                self.update_git_submodules(repo)

            repo.git.gc("--prune=now")
            self._create_archive(repo.working_dir)
Ejemplo n.º 20
0
    def add_package(self, pkg_info: Dict[str, str], path: str, deps: List[Dict[str, Any]]) -> None:
        """Add a package with deps.

        :param dict[str, str] pkg_info: a mapping containing a package information.
            It must have ``name``, ``type`` and ``version`` key/value pairs.
        :param str path: the path where the package is retreived. Consult with the
            ``fetch_*_source`` for the defailed information about a package's path.
        :param deps: a list of depencencies the package has.
        :type deps: list[dict[str, any]]
        :raises CachitoError: if there is a package with same name, type and version
            has been added already.
        """
        key = (pkg_info["name"], pkg_info["type"], pkg_info["version"])
        if key in self._index:
            raise CachitoError(f"Duplicate package: {pkg_info!r}")
        self._index.add(key)
        package = {
            "name": pkg_info["name"],
            "type": pkg_info["type"],
            "version": pkg_info["version"],
            "dependencies": deps,
        }
        if path != os.curdir:
            package["path"] = path
        self._packages.append(package)
Ejemplo n.º 21
0
def test_fetch_source_invalid_archive_exists(mock_clone, mock_verify, caplog,
                                             gitsubmodule):
    mock_verify.side_effect = [CachitoError("stub"), None]
    scm_git = scm.Git(url, ref)
    po = mock.patch.object
    if gitsubmodule:
        with mock.patch("cachito.workers.scm.SourcesDir") as mock_scr:
            scm_git_submodule = scm.Git(url, f"{ref}-with-submodules")
            mock_scr.return_value = scm_git_submodule.sources_dir
            with po(scm_git_submodule.sources_dir.archive_path,
                    "exists",
                    return_value=True):
                with po(scm_git_submodule.sources_dir.package_dir,
                        "glob") as glob:
                    scm_git_submodule.fetch_source(gitsubmodule)
        glob.assert_called_once()
        msg = f'The archive at "{scm_git_submodule.sources_dir.archive_path}" is '
        "invalid and will be re-created"
        assert msg in caplog.text
    else:
        with po(scm_git.sources_dir.archive_path, "exists", return_value=True):
            with po(scm_git.sources_dir.package_dir, "glob") as glob:
                scm_git.fetch_source(gitsubmodule)
        glob.assert_called_once()
        msg = (
            f'The archive at "{scm_git.sources_dir.archive_path}" is invalid and will be re-created'
        )
        assert msg in caplog.text
    mock_clone.assert_called_once()
Ejemplo n.º 22
0
def fetch_app_source(url, ref, request_id_to_update=None):
    """
    Fetch the application source code that was requested and put it in long-term storage.

    :param str url: the source control URL to pull the source from
    :param str ref: the source control reference
    :param int request_id_to_update: the Cachito request ID this is for; if specified, this will
        update the request's state
    """
    log.info('Fetching the source from "%s" at reference "%s"', url, ref)
    if request_id_to_update:
        set_request_state(request_id_to_update, 'in_progress',
                          'Fetching the application source')
    try:
        # Default to Git for now
        scm = Git(url, ref)
        scm.fetch_source()
    except requests.Timeout:
        raise CachitoError(
            'The connection timed out while downloading the source')
    except CachitoError:
        log.exception(
            'Failed to fetch the source from the URL "%s" and reference "%s"',
            url, ref)
        raise

    return scm.archive_path
Ejemplo n.º 23
0
def run_cmd(cmd, params, exc_msg=None):
    """
    Run the given command with provided parameters.

    :param iter cmd: iterable representing command to be executed
    :param dict params: keyword parameters for command execution
    :param str exc_msg: an optional exception message when the command fails
    :returns: the command output
    :rtype: str
    :raises CachitoError: if the command fails
    """
    params.setdefault('capture_output', True)
    params.setdefault('universal_newlines', True)
    params.setdefault('encoding', 'utf-8')

    response = subprocess.run(cmd, **params)

    if response.returncode != 0:
        log.error(
            'The command "%s" failed with: %s',
            ' '.join(cmd),
            response.stderr,
        )
        raise CachitoError(exc_msg or 'An unexpected error occurred')

    return response.stdout
Ejemplo n.º 24
0
def fetch_app_source(url, ref, request_id, gitsubmodule=False):
    """
    Fetch the application source code that was requested and put it in long-term storage.

    :param str url: the source control URL to pull the source from
    :param str ref: the source control reference
    :param int request_id: the Cachito request ID this is for
    :param bool gitsubmodule: a bool to determine whether git submodules need to be processed.
    """
    log.info('Fetching the source from "%s" at reference "%s"', url, ref)
    set_request_state(request_id, "in_progress",
                      "Fetching the application source")
    try:
        # Default to Git for now
        scm = Git(url, ref)
        scm.fetch_source(gitsubmodule=gitsubmodule)
    except requests.Timeout:
        raise CachitoError(
            "The connection timed out while downloading the source")
    except CachitoError:
        log.exception(
            'Failed to fetch the source from the URL "%s" and reference "%s"',
            url, ref)
        raise

    # Extract the archive contents to the temporary directory of where the bundle is being created.
    # This will eventually end up in the bundle the user downloads. This is extracted now since
    # some package managers may add dependency replacements, which require edits to source files.
    bundle_dir = RequestBundleDir(request_id)
    log.debug("Extracting %s to %s", scm.sources_dir.archive_path, bundle_dir)
    shutil.unpack_archive(str(scm.sources_dir.archive_path), str(bundle_dir))
    _enforce_sandbox(bundle_dir.source_root_dir)
Ejemplo n.º 25
0
def run_download_cmd(cmd: Iterable[str], params: Dict[str, str]) -> str:
    """Run gomod command that downloads dependencies.

    Such commands may fail due to network errors (go is bad at retrying), so the entire operation
    will be retried a configurable number of times.

    Cachito will reuse the same cache directory between retries, so Go will not have to download
    the same dependency twice. The backoff is exponential, Cachito will wait 1s -> 2s -> 4s -> ...
    before retrying.
    """
    n_tries = get_worker_config().cachito_gomod_download_max_tries

    @backoff.on_exception(
        backoff.expo,
        CachitoCalledProcessError,
        jitter=None,  # use deterministic backoff, do not apply jitter
        max_tries=n_tries,
        logger=log,
    )
    def run_go(_cmd, _params) -> str:
        log.debug(f"Running {_cmd}")
        return run_gomod_cmd(_cmd, _params)

    try:
        return run_go(cmd, params)
    except CachitoCalledProcessError:
        err_msg = (
            f"Processing gomod dependencies failed. Cachito tried the {' '.join(cmd)} command "
            f"{n_tries} times. This may indicate a problem with your repository or Cachito itself."
        )
        raise CachitoError(err_msg)
Ejemplo n.º 26
0
def mark_as_stale(request_id):
    """
    Mark the identified stale request ID as `stale` in Cachito.

    :param int request_id: request ID identified as stale
    :raise CachitoError: if the request to the Cachito API fails
    """
    try:
        log.info("Setting state of request %d to `stale`", request_id)
        request_rv = auth_session.patch(
            f'{config.cachito_api_url.rstrip("/")}/requests/{request_id}',
            json=payload,
            timeout=config.cachito_api_timeout,
        )
    except requests.RequestException:
        msg = f"The connection failed when setting the `stale` state on request {request_id}"
        log.exception(msg)
        raise CachitoError(msg)

    if not request_rv.ok:
        log.error(
            "Failed to set the `stale` state on request %d. The status was %d. "
            "The text was:\n%s",
            request_id,
            request_rv.status_code,
            request_rv.text,
        )
Ejemplo n.º 27
0
def finalize_nexus_for_js_request(repo_name, username):
    """
    Finalize the Nexus configuration so that the request's npm repository is ready for consumption.

    :param str repo_name: the name of the repository for the request for this package manager
    :param str username: the username of the user to be created for the request for this package
        manager
    :return: the password of the Nexus user that has access to the request's npm repository
    :rtype: str
    :raise CachitoError: if the script execution fails
    """
    # Generate a 24-32 character (each byte is two hex characters) password
    password = secrets.token_hex(random.randint(12, 16))
    payload = {
        "password": password,
        "repository_name": repo_name,
        "username": username
    }
    script_name = "js_after_content_staged"
    try:
        nexus.execute_script(script_name, payload)
    except NexusScriptError:
        log.exception("Failed to execute the script %s", script_name)
        raise CachitoError(
            "Failed to configure Nexus to allow the request's npm repository to be ready for "
            "consumption")
    return password
Ejemplo n.º 28
0
def run_cmd(cmd, params, exc_msg=None):
    """
    Run the given command with provided parameters.

    :param iter cmd: iterable representing command to be executed
    :param dict params: keyword parameters for command execution
    :param str exc_msg: an optional exception message when the command fails
    :returns: the command output
    :rtype: str
    :raises CachitoError: if the command fails
    """
    params.setdefault("capture_output", True)
    params.setdefault("universal_newlines", True)
    params.setdefault("encoding", "utf-8")

    conf = get_worker_config()
    params.setdefault("timeout", conf.cachito_subprocess_timeout)

    try:
        response = subprocess.run(cmd, **params)  # nosec
    except subprocess.TimeoutExpired as e:
        raise CachitoError(str(e))

    if response.returncode != 0:
        log.error('The command "%s" failed with: %s', " ".join(cmd),
                  response.stderr)
        raise CachitoCalledProcessError(
            exc_msg or "An unexpected error occurred", response.returncode)

    return response.stdout
Ejemplo n.º 29
0
 def add_status(service_name, ok, reason):
     if short and not ok:
         raise CachitoError(f"{service_name} unavailable: {reason}")
     service = {"name": service_name, "ok": ok}
     if not ok:
         service["reason"] = reason
     services.append(service)
Ejemplo n.º 30
0
def fetch_app_source(url, ref, request_id):
    """
    Fetch the application source code that was requested and put it in long-term storage.

    :param str url: the source control URL to pull the source from
    :param str ref: the source control reference
    :param int request_id: the Cachito request ID this is for
    """
    log.info('Fetching the source from "%s" at reference "%s"', url, ref)
    set_request_state(request_id, 'in_progress',
                      'Fetching the application source')
    try:
        # Default to Git for now
        scm = Git(url, ref)
        scm.fetch_source()
    except requests.Timeout:
        raise CachitoError(
            'The connection timed out while downloading the source')
    except CachitoError:
        log.exception(
            'Failed to fetch the source from the URL "%s" and reference "%s"',
            url, ref)
        raise

    # Extract the archive contents to the temporary directory of where the bundle is being created.
    # This will eventually end up in the bundle the user downloads. This is extracted now since
    # some package managers may add dependency replacements, which require edits to source files.
    request_bundle_dir = get_request_bundle_dir(request_id)
    if not os.path.exists(request_bundle_dir):
        log.debug('Creating %s', request_bundle_dir)
        os.makedirs(request_bundle_dir, exist_ok=True)
    log.debug('Extracting %s to %s', scm.archive_path, request_bundle_dir)
    extract_app_src(scm.archive_path, request_bundle_dir)