Esempio n. 1
0
def add_request_config_files(request_id):
    """
    Add the configuration files associated with the given request.

    :param int request_id: the value of the request ID
    :return: a Flask JSON response
    :rtype: flask.Response
    :raise NotFound: if the request is not found
    :raise ValidationError: if the JSON is invalid
    """
    payload = flask.request.get_json()
    if not isinstance(payload, list):
        raise ValidationError("The input data must be a JSON array")

    request = Request.query.get_or_404(request_id)
    flask.current_app.logger.info(
        "Adding %d configuration files to the request %d", len(payload), request.id
    )

    for config_file in payload:
        ConfigFileBase64.validate_json(config_file)
        config_file_obj = ConfigFileBase64.get_or_create(
            config_file["path"], config_file["content"]
        )
        if config_file_obj not in request.config_files_base64:
            request.config_files_base64.append(config_file_obj)

    if current_user.is_authenticated:
        flask.current_app.logger.info(
            "The user %s added %d configuration files to request %d",
            current_user.username,
            len(payload),
            request.id,
        )
    else:
        flask.current_app.logger.info(
            "An anonymous user added %d configuration files to request %d", len(payload), request.id
        )

    db.session.commit()
    return "", 204
Esempio n. 2
0
def get_requests():
    """
    Retrieve paginated details for requests.

    :rtype: flask.Response
    """
    # Check if the user is filtering requests by state
    state = flask.request.args.get("state")
    # Default verbose flag to False
    verbose = str_to_bool(flask.request.args.get("verbose", False))
    max_per_page = flask.current_app.config["CACHITO_MAX_PER_PAGE"]
    # The call to `paginate` will inspect the current HTTP request for the
    # pagination parameters `page` and `per_page`.
    query = Request.query.order_by(Request.id.desc())
    if state:
        if state not in RequestStateMapping.get_state_names():
            states = ", ".join(RequestStateMapping.get_state_names())
            raise ValidationError(
                f"{state} is not a valid request state. Valid states are: {states}"
            )
        state_int = RequestStateMapping.__members__[state].value
        query = query.join(RequestState,
                           Request.request_state_id == RequestState.id)
        query = query.filter(RequestState.state == state_int)
    try:
        per_page = int(flask.request.args.get("per_page", 10))
    except ValueError:
        per_page = 10
    pagination_query = query.paginate(per_page=per_page,
                                      max_per_page=max_per_page)
    requests = pagination_query.items
    query_params = {}
    if state:
        query_params["state"] = state
    if verbose:
        query_params["verbose"] = verbose
    response = {
        "items": [request.to_json(verbose=verbose) for request in requests],
        "meta": pagination_metadata(pagination_query, **query_params),
    }
    return flask.jsonify(response)
Esempio n. 3
0
def _validate_gitsubmodule_exclusivity(pkg_manager_paths, mutually_exclusive):
    """
    Validate exclusivity of git-submodule with other package managers.

    :param dict pkg_manager_paths: mapping of package managers and their paths in a request
    :param set mutually_exclusive: set of pairs of mutually exclusive package managers
    :raises ValidationError: if any package manager conflicts with git-submodule
    """
    for pkg_manager, paths in pkg_manager_paths.items():
        a, b = pkg_manager, "git-submodule"
        if not ((a, b) in mutually_exclusive or (b, a) in mutually_exclusive):
            continue

        if any(path != "." for path in paths):
            msg = (
                f"Cannot process non-root packages with '{pkg_manager}' "
                "when 'git-submodule' is also set.\nSolution: remove the git submodules "
                "and instead process the upstream repos (corresponding to the submodules) "
                "directly. Each repo should be processed in a separate request."
            )
            raise ValidationError(msg)
Esempio n. 4
0
def _enforce_sandbox(repo_root):
    """
    Check that there are no symlinks that try to leave the cloned repository.

    :param (str | Path) repo_root: absolute path to root of cloned repository
    :raises ValidationError: if any symlink points outside of cloned repository
    """
    for dirpath, subdirs, files in os.walk(repo_root):
        dirpath = Path(dirpath)

        for entry in subdirs + files:
            full_path = dirpath / entry
            real_path = full_path.resolve()
            try:
                real_path.relative_to(repo_root)
            except ValueError:
                # Unlike the real path, the full path is always relative to the root
                relative_path = str(full_path.relative_to(repo_root))
                raise ValidationError(
                    f"The destination of {relative_path!r} is outside of cloned repository"
                )
Esempio n. 5
0
    def get_pkg_managers(cls, pkg_managers):
        """
        Validate the input package managers and return their corresponding database objects.

        :param list pkg_managers: the list of package manager names to retrieve
        :return: a list of valid PackageManager objects
        :rtype: list
        :raise ValidationError: if one of the input package managers is invalid
        """
        pkg_managers = set(pkg_managers)
        found_pkg_managers = cls.query.filter(
            PackageManager.name.in_(pkg_managers)).all()
        if len(pkg_managers) != len(found_pkg_managers):
            found_pkg_managers_names = set(
                pkg_manager.name for pkg_manager in found_pkg_managers)
            invalid_pkg_managers = pkg_managers - found_pkg_managers_names
            raise ValidationError(
                'The following package managers are invalid: {}'.format(
                    ', '.join(invalid_pkg_managers)))

        return found_pkg_managers
Esempio n. 6
0
    def _assert(self, check_presence: Callable[[Path], bool], path: str,
                expect: bool, err_template: str):
        """
        Make an assertion about the presence of a file, raise an error if it fails.

        Turns `path` into an absolute path, calls check_presence() on it and compares the result
        with the expected value.

        :param (Path) -> bool check_presence: method to check file presence, e.g. Path.is_file
        :param str path: relative path to file/directory from root of package directory
        :param bool expect: expect the file/directory to be present?
        :param str err_template: error message which may contain {relpath} as a placeholder
        :raises ValidationError: if the assertion fails
        """
        fullpath = self._pkg_dir / path

        if check_presence(fullpath) != expect:
            relpath = fullpath.relative_to(self._root_dir)
            err_msg = err_template.format(relpath=relpath)
            raise ValidationError(
                f"File check failed for {self._pkg_manager}: {err_msg}")
Esempio n. 7
0
    def add_dependency(self, dependency, replaced_dependency=None):
        """
        Associate a dependency with this request if the association doesn't exist.

        This replaces the use of ``request.dependencies.append`` to be able to associate
        a dependency that is being replaced using the ``replaced_dependency`` keyword argument.

        Note that the association is added to the database session but not committed.

        :param Dependency dependency: a Dependency object
        :param Dependency replaced_dependency: an optional Dependency object to mark as being
            replaced by the input dependency for this request
        :raises ValidationError: if the dependency is already associated with the request, but
            replaced_dependency is different than what is already associated
        """
        # If the ID is not set, then the dependency was just created and is not part of the
        # database's transaction buffer.
        if not dependency.id or (replaced_dependency
                                 and not replaced_dependency.id):
            # Send the changes queued up in SQLAlchemy to the database's transaction buffer. This
            # will genereate an ID that can be used for the mapping below.
            db.session.flush()

        mapping = RequestDependency.query.filter_by(
            request_id=self.id, dependency_id=dependency.id).first()

        if mapping:
            if mapping.replaced_dependency_id != getattr(
                    replaced_dependency, 'id', None):
                raise ValidationError(
                    f'The dependency {dependency.to_json()} can\'t have a new replacement set'
                )
            return

        mapping = RequestDependency(request_id=self.id,
                                    dependency_id=dependency.id)
        if replaced_dependency:
            mapping.replaced_dependency_id = replaced_dependency.id

        db.session.add(mapping)
Esempio n. 8
0
def _validate_package_manager_exclusivity(pkg_manager_names, package_configs,
                                          mutually_exclusive):
    """
    Ensure that no package gets processed by two or more mutually exclusive package managers.

    Note: git-submodule is a special case, because we always fetch all submodules. Therefore
    we do not know which subpaths are actually submodules prior to processing the request, and
    we have to assume that any non-root path is a submodule.

    :param list pkg_manager_names: the list of package manager names for the request
    :param dict package_configs: the "packages" parameter in a request
    :param list mutually_exclusive: list of pairs of mutually exclusive package managers
    :raises ValidationError: if the package configuration has conflicting paths (even implicitly)
    """
    mutually_exclusive = set((a, b) for a, b in mutually_exclusive)

    pkg_manager_paths = {
        pkg_manager: set(
            os.path.normpath(pkg_cfg.get("path", "."))
            for pkg_cfg in package_configs.get(pkg_manager, [{}]))
        for pkg_manager in pkg_manager_names if pkg_manager != "git-submodule"
    }

    if "git-submodule" in pkg_manager_names:
        _validate_gitsubmodule_exclusivity(pkg_manager_paths,
                                           mutually_exclusive)

    # Check all package manager pairs
    for a, b in itertools.combinations(pkg_manager_paths, 2):
        if not ((a, b) in mutually_exclusive or (b, a) in mutually_exclusive):
            continue

        conflicting_paths = pkg_manager_paths[a] & pkg_manager_paths[b]
        if conflicting_paths:
            msg = (
                f"The following paths cannot be processed by both '{a}' and '{b}': "
                f"{', '.join(sorted(conflicting_paths))}")
            raise ValidationError(msg)
Esempio n. 9
0
def download_archive(request_id):
    """
    Download archive of source code.

    :param int request_id: the value of the request ID
    :return: a Flask send_file response
    :rtype: flask.Response
    :raise NotFound: if the request is not found
    """
    request = Request.query.get_or_404(request_id)
    if request.last_state.state_name != 'complete':
        raise ValidationError(
            'The request must be in the "complete" state before downloading the archive')

    cachito_shared_dir = flask.current_app.config['CACHITO_SHARED_DIR']
    wait_timeout = flask.current_app.config['CACHITO_WAIT_TIMEOUT']

    with tempfile.TemporaryDirectory(prefix='cachito-', dir=cachito_shared_dir) as temp_dir:
        # Although the cachito_shared_dir volume is required to be the same between celery
        # workers and the API, they may be mounted at different locations. Use relative
        # paths to agree on data location within the shared volume.
        relative_temp_dir = os.path.basename(temp_dir)
        relative_deps_path = os.path.join(relative_temp_dir, 'deps')
        relative_bundle_archive_path = os.path.join(relative_temp_dir, 'bundle.tar.gz')
        absolute_bundle_archive_path = os.path.join(
            cachito_shared_dir, relative_bundle_archive_path)

        # Chain tasks
        chain_result = chain(
            tasks.fetch_app_source.s(request.repo, request.ref),
            tasks.fetch_gomod_source.s(copy_cache_to=relative_deps_path),
            tasks.assemble_source_code_archive.s(
                deps_path=relative_deps_path, bundle_archive_path=relative_bundle_archive_path)
        ).delay()
        chain_result.wait(timeout=wait_timeout)

        return flask.send_file(absolute_bundle_archive_path, mimetype='application/gzip')
Esempio n. 10
0
    def validate_json(cls, name, info):
        """
        Validate the input environment variable.

        :param str name: the name of the environment variable
        :param dict info: the description of the environment variable. Must include "value" and
            "kind" attributes
        :raises ValidationError: if the environment variable is invalid
        """
        if not isinstance(name, str):
            raise ValidationError(
                "The name of environment variables must be a string")
        if not isinstance(info, dict):
            raise ValidationError(
                "The info of environment variables must be an object")

        required_keys = {"value", "kind"}
        missing_keys = required_keys - info.keys()
        if missing_keys:
            raise ValidationError(
                "The following keys must be set in the info of the environment variables: "
                f"{', '.join(sorted(missing_keys))}")

        invalid_keys = info.keys() - required_keys
        if invalid_keys:
            raise ValidationError(
                "The following keys are not allowed in the info of the environment "
                f"variables: {', '.join(sorted(invalid_keys))}")

        if not isinstance(info["value"], str):
            raise ValidationError(
                "The value of environment variables must be a string")
        kind = info.get("kind")
        if not isinstance(kind, str):
            raise ValidationError(
                "The kind of environment variables must be a string")
        if kind not in cls.VALID_KINDS:
            raise ValidationError(
                f"The environment variable kind, {kind}, is not supported")
Esempio n. 11
0
def patch_request(request_id):
    """
    Modify the given request.

    :param int request_id: the request ID from the URL
    :return: a Flask JSON response
    :rtype: flask.Response
    :raise NotFound: if the request is not found
    :raise ValidationError: if the JSON is invalid
    """
    # Convert the allowed users to lower-case since they are stored in the database as lower-case
    # for consistency
    allowed_users = [
        user.lower()
        for user in flask.current_app.config['CACHITO_WORKER_USERNAMES']
    ]
    # current_user.is_authenticated is only ever False when auth is disabled
    if current_user.is_authenticated and current_user.username not in allowed_users:
        raise Unauthorized(
            'This API endpoint is restricted to Cachito workers')

    payload = flask.request.get_json()
    if not isinstance(payload, dict):
        raise ValidationError('The input data must be a JSON object')

    if not payload:
        raise ValidationError(
            'At least one key must be specified to update the request')

    valid_keys = {
        'dependencies', 'environment_variables', 'state', 'state_reason'
    }
    invalid_keys = set(payload.keys()) - valid_keys
    if invalid_keys:
        raise ValidationError('The following keys are not allowed: {}'.format(
            ', '.join(invalid_keys)))

    for key, value in payload.items():
        if key == 'dependencies':
            if not isinstance(value, list):
                raise ValidationError(
                    'The value for "dependencies" must be an array')
            for dep in value:
                Dependency.validate_json(dep)
        elif key == 'environment_variables':
            if not isinstance(value, dict):
                raise ValidationError(
                    'The value for "{}" must be an object'.format(key))
            for env_var_name, env_var_value in value.items():
                EnvironmentVariable.validate_json(env_var_name, env_var_value)
        elif not isinstance(value, str):
            raise ValidationError(
                'The value for "{}" must be a string'.format(key))

    if 'state' in payload and 'state_reason' not in payload:
        raise ValidationError(
            'The "state_reason" key is required when "state" is supplied')
    elif 'state_reason' in payload and 'state' not in payload:
        raise ValidationError(
            'The "state" key is required when "state_reason" is supplied')

    request = Request.query.get_or_404(request_id)
    delete_bundle = False
    delete_bundle_temp = False
    if 'state' in payload and 'state_reason' in payload:
        last_state = request.last_state
        new_state = payload['state']
        delete_bundle = new_state == 'stale'
        delete_bundle_temp = new_state in ('complete', 'failed')
        new_state_reason = payload['state_reason']
        # This is to protect against a Celery task getting executed twice and setting the
        # state each time
        if last_state.state_name == new_state and last_state.state_reason == new_state_reason:
            flask.current_app.logger.info(
                'Not adding a new state since it matches the last state')
        else:
            request.add_state(new_state, new_state_reason)

    if 'dependencies' in payload:
        for dep in payload['dependencies']:
            dep_obj = Dependency.query.filter_by(**dep).first()
            if not dep_obj:
                dep_obj = Dependency.from_json(dep)
                db.session.add(dep_obj)

            if dep_obj not in request.dependencies:
                request.dependencies.append(dep_obj)

    for name, value in payload.get('environment_variables', {}).items():
        env_var_obj = EnvironmentVariable.query.filter_by(name=name,
                                                          value=value).first()
        if not env_var_obj:
            env_var_obj = EnvironmentVariable.from_json(name, value)
            db.session.add(env_var_obj)

        if env_var_obj not in request.environment_variables:
            request.environment_variables.append(env_var_obj)

    db.session.commit()
    if delete_bundle and os.path.exists(request.bundle_archive):
        flask.current_app.logger.info('Deleting the bundle archive %s',
                                      request.bundle_archive)
        try:
            os.remove(request.bundle_archive)
        except:  # noqa E722
            flask.current_app.logger.exception(
                'Failed to delete the bundle archive %s',
                request.bundle_archive)

    if delete_bundle_temp and os.path.exists(request.bundle_temp_files):
        flask.current_app.logger.debug(
            'Deleting the temporary files used to create the bundle at %s',
            request.bundle_temp_files,
        )
        try:
            shutil.rmtree(request.bundle_temp_files)
        except:  # noqa E722
            flask.current_app.logger.exception(
                'Failed to delete the temporary files at %s',
                request.bundle_temp_files)

    if current_user.is_authenticated:
        flask.current_app.logger.info('The user %s patched request %d',
                                      current_user.username, request.id)
    else:
        flask.current_app.logger.info('An anonymous user patched request %d',
                                      request.id)

    return flask.jsonify(request.to_json()), 200
Esempio n. 12
0
def create_request():
    """
    Submit a request to resolve and cache the given source code and its dependencies.

    :param str repo: the URL to the SCM repository
    :param str ref: the SCM reference to fetch
    :param list<str> pkg_managers: list of package managers to be used for resolving dependencies
    :param list<str> flags: list of flag names
    :rtype: flask.Response
    :raise ValidationError: if required parameters are not supplied
    """
    payload = flask.request.get_json()
    if not isinstance(payload, dict):
        raise ValidationError('The input data must be a JSON object')

    request = Request.from_json(payload)
    if not re.match(r'^[a-f0-9]{40}', request.ref):
        raise ValidationError(
            'The "ref" parameter must be a 40 character hex string')
    db.session.add(request)
    db.session.commit()

    if current_user.is_authenticated:
        flask.current_app.logger.info('The user %s submitted request %d',
                                      current_user.username, request.id)
    else:
        flask.current_app.logger.info('An anonymous user submitted request %d',
                                      request.id)

    pkg_manager_names = set(pkg_manager.name
                            for pkg_manager in request.pkg_managers)
    auto_detect = len(pkg_manager_names) == 0
    if auto_detect:
        flask.current_app.logger.info(
            'Automatic detection will be used since "pkg_managers" was empty')

    # Chain tasks
    error_callback = tasks.failed_request_callback.s(request.id)
    chain_tasks = [
        tasks.fetch_app_source.s(request.repo, request.ref,
                                 request.id).on_error(error_callback),
    ]
    if 'gomod' in pkg_manager_names or auto_detect:
        gomod_dependency_replacements = [
            dependency_replacement for dependency_replacement in payload.get(
                'dependency_replacements', [])
            if dependency_replacement['type'] == 'gomod'
        ]
        chain_tasks.append(
            tasks.fetch_gomod_source.si(
                request.id,
                auto_detect,
                gomod_dependency_replacements,
            ).on_error(error_callback))

    chain_tasks.extend([
        tasks.create_bundle_archive.si(request.id).on_error(error_callback),
        tasks.set_request_state.si(request.id, 'complete',
                                   'Completed successfully'),
    ])

    chain(chain_tasks).delay()
    flask.current_app.logger.debug('Successfully scheduled request %d',
                                   request.id)
    return flask.jsonify(request.to_json()), 201
Esempio n. 13
0
 def validate_json(cls, name, value):
     if not isinstance(value, str):
         raise ValidationError(
             'The value of environment variables must be a string')
Esempio n. 14
0
    def from_json(cls, kwargs):
        # Validate all required parameters are present
        required_params = {'repo', 'ref'}
        optional_params = {
            'dependency_replacements', 'flags', 'pkg_managers', 'user'
        }
        missing_params = required_params - set(kwargs.keys()) - optional_params
        if missing_params:
            raise ValidationError('Missing required parameter(s): {}'.format(
                ', '.join(missing_params)))

        # Don't allow the user to set arbitrary columns or relationships
        invalid_params = set(kwargs.keys()) - required_params - optional_params
        if invalid_params:
            raise ValidationError(
                'The following parameters are invalid: {}'.format(
                    ', '.join(invalid_params)))

        request_kwargs = deepcopy(kwargs)

        # Validate package managers are correctly provided
        pkg_managers_names = request_kwargs.pop('pkg_managers', None)
        # If no package managers are specified, then Cachito will detect them automatically
        if pkg_managers_names:
            pkg_managers = PackageManager.get_pkg_managers(pkg_managers_names)
            request_kwargs['pkg_managers'] = pkg_managers

        flag_names = request_kwargs.pop('flags', None)
        if flag_names:
            flag_names = set(flag_names)
            found_flags = (Flag.query.filter(Flag.name.in_(flag_names)).filter(
                Flag.active).all())

            if len(flag_names) != len(found_flags):
                found_flag_names = set(flag.name for flag in found_flags)
                invalid_flags = flag_names - found_flag_names
                raise ValidationError('Invalid/Inactive flag(s): {}'.format(
                    ', '.join(invalid_flags)))

            request_kwargs['flags'] = found_flags

        dependency_replacements = request_kwargs.pop('dependency_replacements',
                                                     [])
        if not isinstance(dependency_replacements, list):
            raise ValidationError('"dependency_replacements" must be an array')

        for dependency_replacement in dependency_replacements:
            Dependency.validate_replacement_json(dependency_replacement)

        submitted_for_username = request_kwargs.pop('user', None)
        # current_user.is_authenticated is only ever False when auth is disabled
        if submitted_for_username and not current_user.is_authenticated:
            raise ValidationError(
                'Cannot set "user" when authentication is disabled')
        if current_user.is_authenticated:
            if submitted_for_username:
                allowed_users = flask.current_app.config[
                    'CACHITO_USER_REPRESENTATIVES']
                if current_user.username not in allowed_users:
                    raise Forbidden(
                        'You are not authorized to create a request on behalf of another user'
                    )

                submitted_for = User.get_or_create(submitted_for_username)
                if not submitted_for.id:
                    # Send the changes queued up in SQLAlchemy to the database's transaction buffer.
                    # This will genereate an ID that can be used below.
                    db.session.flush()
                request_kwargs['user_id'] = submitted_for.id
                request_kwargs['submitted_by_id'] = current_user.id
            else:
                request_kwargs['user_id'] = current_user.id
        request = cls(**request_kwargs)
        request.add_state('in_progress', 'The request was initiated')
        return request
Esempio n. 15
0
def _validate_request_package_configs(request_kwargs, pkg_managers_names):
    """
    Validate the "packages" parameter in a new request.

    :param dict request_kwargs: the JSON parameters of the new request
    :param list pkg_managers_names: the list of valid package manager names for the request
    :raises ValidationError: if the "packages" parameter is invalid
    """
    # Validate the custom packages configuration. For example:
    # {"packages": {"npm": [{"path": "client"}]}}
    packages_configs = request_kwargs.get("packages", {})
    if not isinstance(packages_configs, dict):
        raise ValidationError('The "packages" parameter must be an object')

    invalid_package_managers = packages_configs.keys() - set(
        pkg_managers_names)
    if invalid_package_managers:
        raise ValidationError(
            'The following package managers in the "packages" object do not apply: '
            + ", ".join(invalid_package_managers))

    supported_packages_configs = {"npm", "pip", "gomod", "yarn"}
    unsupported_packages_managers = packages_configs.keys(
    ) - supported_packages_configs
    if unsupported_packages_managers:
        raise ValidationError(
            'The following package managers in the "packages" object are unsupported: '
            + ", ".join(unsupported_packages_managers))

    # Validate the values for each package manager configuration (e.g. packages.npm)
    valid_package_config_keys = {
        "npm": {"path"},
        "pip": {"path", "requirements_build_files", "requirements_files"},
        "gomod": {"path"},
        "yarn": {"path"},
    }
    for pkg_manager, packages_config in packages_configs.items():
        invalid_format_error = (
            f'The value of "packages.{pkg_manager}" must be an array of objects with the following '
            f'keys: {", ".join(valid_package_config_keys[pkg_manager])}')
        if not isinstance(packages_config, list):
            raise ValidationError(invalid_format_error)

        for package_config in packages_config:
            if not isinstance(package_config, dict) or not package_config:
                raise ValidationError(invalid_format_error)

            invalid_keys = package_config.keys(
            ) - valid_package_config_keys[pkg_manager]
            if invalid_keys:
                raise ValidationError(invalid_format_error)

            if package_config.get("path") is not None:
                _validate_configuration_path_value(pkg_manager, "path",
                                                   package_config["path"])
            for path in package_config.get("requirements_files", []):
                _validate_configuration_path_value(pkg_manager,
                                                   "requirements_files", path)
            for path in package_config.get("requirements_build_files", []):
                _validate_configuration_path_value(pkg_manager,
                                                   "requirements_build_files",
                                                   path)

    _validate_package_manager_exclusivity(
        pkg_managers_names,
        packages_configs,
        flask.current_app.
        config["CACHITO_MUTUALLY_EXCLUSIVE_PACKAGE_MANAGERS"],
    )
Esempio n. 16
0
def create_request():
    """
    Submit a request to resolve and cache the given source code and its dependencies.

    :rtype: flask.Response
    :raise ValidationError: if required parameters are not supplied
    """
    payload = flask.request.get_json()
    if not isinstance(payload, dict):
        raise ValidationError("The input data must be a JSON object")

    request = Request.from_json(payload)
    db.session.add(request)
    db.session.commit()

    if current_user.is_authenticated:
        flask.current_app.logger.info("The user %s submitted request %d",
                                      current_user.username, request.id)
    else:
        flask.current_app.logger.info("An anonymous user submitted request %d",
                                      request.id)

    pkg_manager_names = set(pkg_manager.name
                            for pkg_manager in request.pkg_managers)
    supported_pkg_managers = set(
        flask.current_app.config["CACHITO_PACKAGE_MANAGERS"])
    unsupported_pkg_managers = pkg_manager_names - supported_pkg_managers
    if unsupported_pkg_managers:
        # At this point, unsupported_pkg_managers would only contain valid package managers that
        # are not enabled
        raise ValidationError(
            "The following package managers are not "
            f"enabled: {', '.join(unsupported_pkg_managers)}")

    # Chain tasks
    error_callback = tasks.failed_request_callback.s(request.id)
    chain_tasks = [
        tasks.fetch_app_source.s(request.repo, request.ref, request.id,
                                 "git-submodule"
                                 in pkg_manager_names).on_error(error_callback)
    ]

    pkg_manager_to_dep_replacements = {}
    for dependency_replacement in payload.get("dependency_replacements", []):
        type_ = dependency_replacement["type"]
        pkg_manager_to_dep_replacements.setdefault(type_, [])
        pkg_manager_to_dep_replacements[type_].append(dependency_replacement)

    package_configs = payload.get("packages", {})
    if "gomod" in pkg_manager_names:
        go_package_configs = package_configs.get("gomod", [])
        chain_tasks.append(
            tasks.fetch_gomod_source.si(
                request.id, pkg_manager_to_dep_replacements.get("gomod", []),
                go_package_configs).on_error(error_callback))
    if "npm" in pkg_manager_names:
        if pkg_manager_to_dep_replacements.get("npm"):
            raise ValidationError(
                "Dependency replacements are not yet supported for the npm package manager"
            )

        npm_package_configs = package_configs.get("npm", [])
        chain_tasks.append(
            tasks.fetch_npm_source.si(
                request.id, npm_package_configs).on_error(error_callback))
    if "pip" in pkg_manager_names:
        if pkg_manager_to_dep_replacements.get("pip"):
            raise ValidationError(
                "Dependency replacements are not yet supported for the pip package manager"
            )
        pip_package_configs = package_configs.get("pip", [])
        chain_tasks.append(
            tasks.fetch_pip_source.si(
                request.id, pip_package_configs).on_error(error_callback))
    if "git-submodule" in pkg_manager_names:
        chain_tasks.append(
            tasks.add_git_submodules_as_package.si(
                request.id).on_error(error_callback))
    if "yarn" in pkg_manager_names:
        if pkg_manager_to_dep_replacements.get("yarn"):
            raise ValidationError(
                "Dependency replacements are not yet supported for the yarn package manager"
            )
        yarn_package_configs = package_configs.get("yarn", [])
        chain_tasks.append(
            tasks.fetch_yarn_source.si(
                request.id, yarn_package_configs).on_error(error_callback))

    chain_tasks.append(
        tasks.create_bundle_archive.si(request.id).on_error(error_callback))

    try:
        chain(chain_tasks).delay()
    except kombu.exceptions.OperationalError:
        flask.current_app.logger.exception(
            "Failed to schedule the task for request %d. Failing the request.",
            request.id)
        error = "Failed to schedule the task to the workers. Please try again."
        request.add_state("failed", error)
        raise CachitoError(error)

    flask.current_app.logger.debug("Successfully scheduled request %d",
                                   request.id)
    return flask.jsonify(request.to_json()), 201
Esempio n. 17
0
    def from_json(cls, kwargs):
        """
        Create a Request object from JSON.

        :param dict kwargs: the dictionary representing the request
        :return: the Request object
        :rtype: Request
        """
        # Validate all required parameters are present
        required_params = {"repo", "ref"}
        optional_params = {
            "dependency_replacements", "flags", "packages", "pkg_managers",
            "user"
        }

        missing_params = required_params - set(kwargs.keys()) - optional_params
        if missing_params:
            raise ValidationError("Missing required parameter(s): {}".format(
                ", ".join(missing_params)))

        # Don't allow the user to set arbitrary columns or relationships
        invalid_params = set(kwargs.keys()) - required_params - optional_params
        if invalid_params:
            raise ValidationError(
                "The following parameters are invalid: {}".format(
                    ", ".join(invalid_params)))

        if not is_request_ref_valid(kwargs["ref"]):
            raise ValidationError(
                'The "ref" parameter must be a 40 character hex string')

        request_kwargs = deepcopy(kwargs)

        # Validate package managers are correctly provided
        pkg_managers_names = request_kwargs.pop("pkg_managers", None)
        # Default to the default package managers
        if pkg_managers_names is None:
            flask.current_app.logger.debug(
                "Using the default package manager(s) (%s) on the request",
                ", ".join(flask.current_app.
                          config["CACHITO_DEFAULT_PACKAGE_MANAGERS"]),
            )
            pkg_managers_names = flask.current_app.config[
                "CACHITO_DEFAULT_PACKAGE_MANAGERS"]

        pkg_managers = PackageManager.get_pkg_managers(pkg_managers_names)
        request_kwargs["pkg_managers"] = pkg_managers

        _validate_request_package_configs(request_kwargs, pkg_managers_names
                                          or [])
        # Remove this from the request kwargs since it's not used as part of the creation of
        # the request object
        request_kwargs.pop("packages", None)

        flag_names = request_kwargs.pop("flags", None)
        if flag_names:
            flag_names = set(flag_names)
            found_flags = Flag.query.filter(Flag.name.in_(flag_names)).filter(
                Flag.active).all()

            if len(flag_names) != len(found_flags):
                found_flag_names = set(flag.name for flag in found_flags)
                invalid_flags = flag_names - found_flag_names
                raise ValidationError("Invalid/Inactive flag(s): {}".format(
                    ", ".join(invalid_flags)))

            request_kwargs["flags"] = found_flags

        dependency_replacements = request_kwargs.pop("dependency_replacements",
                                                     [])
        validate_dependency_replacements(dependency_replacements)

        submitted_for_username = request_kwargs.pop("user", None)
        # current_user.is_authenticated is only ever False when auth is disabled
        if submitted_for_username and not current_user.is_authenticated:
            raise ValidationError(
                'Cannot set "user" when authentication is disabled')
        if current_user.is_authenticated:
            if submitted_for_username:
                allowed_users = flask.current_app.config[
                    "CACHITO_USER_REPRESENTATIVES"]
                if current_user.username not in allowed_users:
                    flask.current_app.logger.error(
                        "The user %s tried to submit a request on behalf of another user, but is "
                        "not allowed",
                        current_user.username,
                    )
                    raise Forbidden(
                        "You are not authorized to create a request on behalf of another user"
                    )

                submitted_for = User.get_or_create(submitted_for_username)
                request_kwargs["user"] = submitted_for
                request_kwargs["submitted_by"] = current_user
            else:
                request_kwargs["user"] = current_user._get_current_object()
        request = cls(**request_kwargs)
        request.add_state("in_progress", "The request was initiated")
        return request
Esempio n. 18
0
def patch_request(request_id):
    """
    Modify the given request.

    :param int request_id: the request ID from the URL
    :return: a Flask JSON response
    :rtype: flask.Response
    :raise NotFound: if the request is not found
    :raise ValidationError: if the JSON is invalid
    """
    payload = flask.request.get_json()
    if not isinstance(payload, dict):
        raise ValidationError("The input data must be a JSON object")

    if not payload:
        raise ValidationError(
            "At least one key must be specified to update the request")

    valid_keys = {
        "environment_variables",
        "state",
        "state_reason",
        "packages_count",
        "dependencies_count",
    }
    invalid_keys = set(payload.keys()) - valid_keys
    if invalid_keys:
        raise ValidationError("The following keys are not allowed: {}".format(
            ", ".join(invalid_keys)))

    for key, value in payload.items():
        if key == "environment_variables":
            if not isinstance(value, dict):
                raise ValidationError(
                    'The value for "{}" must be an object'.format(key))
            for env_var_name, env_var_info in value.items():
                EnvironmentVariable.validate_json(env_var_name, env_var_info)
        elif key in ("packages_count", "dependencies_count"):
            if not isinstance(value, int):
                raise ValidationError(
                    f'The value for "{key}" must be an integer')
        elif not isinstance(value, str):
            raise ValidationError(
                'The value for "{}" must be a string'.format(key))

    if "state" in payload and "state_reason" not in payload:
        raise ValidationError(
            'The "state_reason" key is required when "state" is supplied')
    elif "state_reason" in payload and "state" not in payload:
        raise ValidationError(
            'The "state" key is required when "state_reason" is supplied')

    request = Request.query.get_or_404(request_id)
    delete_bundle = False
    delete_bundle_temp = False
    cleanup_nexus = []
    delete_logs = False
    if "state" in payload and "state_reason" in payload:
        new_state = payload["state"]
        delete_bundle = new_state == "stale" and request.state.state_name != "failed"
        if new_state in ("stale", "failed"):
            for pkg_manager in ["npm", "pip", "yarn"]:
                if any(p.name == pkg_manager for p in request.pkg_managers):
                    cleanup_nexus.append(pkg_manager)
        delete_bundle_temp = new_state in ("complete", "failed")
        delete_logs = new_state == "stale"
        new_state_reason = payload["state_reason"]
        # This is to protect against a Celery task getting executed twice and setting the
        # state each time
        if request.state.state_name == new_state and request.state.state_reason == new_state_reason:
            flask.current_app.logger.info(
                "Not adding a new state since it matches the last state")
        else:
            request.add_state(new_state, new_state_reason)

    for env_var_name, env_var_info in payload.get("environment_variables",
                                                  {}).items():
        env_var_obj = EnvironmentVariable.query.filter_by(
            name=env_var_name, **env_var_info).first()
        if not env_var_obj:
            env_var_obj = EnvironmentVariable.from_json(
                env_var_name, env_var_info)
            db.session.add(env_var_obj)

        if env_var_obj not in request.environment_variables:
            request.environment_variables.append(env_var_obj)

    for attr in ("packages_count", "dependencies_count"):
        value = payload.get(attr)
        if value is not None:
            setattr(request, attr, value)

    db.session.commit()

    bundle_dir: RequestBundleDir = RequestBundleDir(
        request.id, root=flask.current_app.config["CACHITO_BUNDLES_DIR"])

    if delete_bundle and bundle_dir.bundle_archive_file.exists():
        flask.current_app.logger.info("Deleting the bundle archive %s",
                                      bundle_dir.bundle_archive_file)
        try:
            bundle_dir.bundle_archive_file.unlink()
            bundle_dir.bundle_archive_checksum.unlink()
            bundle_dir.packages_data.unlink()
        except:  # noqa E722
            flask.current_app.logger.exception(
                "Failed to delete the bundle archive %s",
                bundle_dir.bundle_archive_file)

    if delete_bundle_temp and bundle_dir.exists():
        flask.current_app.logger.info(
            "Deleting the temporary files used to create the bundle at %s",
            bundle_dir)
        try:
            bundle_dir.rmtree()
        except:  # noqa E722
            flask.current_app.logger.exception(
                "Failed to delete the temporary files at %s", bundle_dir)

    if delete_logs:
        request_log_dir = flask.current_app.config[
            "CACHITO_REQUEST_FILE_LOGS_DIR"]
        path_to_file = os.path.join(request_log_dir, f"{request_id}.log")
        try:
            os.remove(path_to_file)
        except:  # noqa E722
            flask.current_app.logger.exception(
                "Failed to delete the log file %s", path_to_file)

    for pkg_mgr in cleanup_nexus:
        flask.current_app.logger.info(
            "Cleaning up the Nexus %s content for request %d", pkg_mgr,
            request_id)
        cleanup_task = getattr(tasks, f"cleanup_{pkg_mgr}_request")
        try:
            cleanup_task.delay(request_id)
        except kombu.exceptions.OperationalError:
            flask.current_app.logger.exception(
                "Failed to schedule the cleanup_%s_request task for request %d. An administrator "
                "must clean this up manually.",
                pkg_mgr,
                request.id,
            )

    if current_user.is_authenticated:
        flask.current_app.logger.info("The user %s patched request %d",
                                      current_user.username, request.id)
    else:
        flask.current_app.logger.info("An anonymous user patched request %d",
                                      request.id)

    return "", 200
Esempio n. 19
0
def get_requests():
    """
    Retrieve paginated details for requests.

    :rtype: flask.Response
    """
    # Check if the user is filtering requests by state
    state = flask.request.args.get("state")
    # Default verbose flag to False
    verbose = str_to_bool(flask.request.args.get("verbose", False))
    max_per_page = flask.current_app.config["CACHITO_MAX_PER_PAGE"]
    # The call to `paginate` will inspect the current HTTP request for the
    # pagination parameters `page` and `per_page`.
    query = Request.query.order_by(Request.id.desc())
    if state:
        if state not in RequestStateMapping.get_state_names():
            states = ", ".join(RequestStateMapping.get_state_names())
            raise ValidationError(
                f"{state} is not a valid request state. Valid states are: {states}"
            )
        state_int = RequestStateMapping.__members__[state].value
        query = query.join(RequestState,
                           Request.request_state_id == RequestState.id)
        query = query.filter(RequestState.state == state_int)
    repo = flask.request.args.get("repo")
    if repo:
        query = query.filter(Request.repo == repo)
    ref = flask.request.args.get("ref")
    if ref:
        if not is_request_ref_valid(ref):
            raise ValidationError(f"{ref} is not a valid ref.")
        query = query.filter(Request.ref == ref)
    pkg_managers = flask.request.args.getlist("pkg_manager")
    if pkg_managers:
        pkg_manager_ids = []
        for name in pkg_managers:
            if not name:
                # Ignore if pkg_manager= presents in the querystring
                continue
            pkg_manager: PackageManager = PackageManager.get_by_name(name)
            if pkg_manager is None:
                raise ValidationError(
                    f"Cachito does not have package manager {name}.")
            pkg_manager_ids.append(pkg_manager.id)
        if pkg_manager_ids:
            query = (query.join(PackageManager, Request.pkg_managers).filter(
                PackageManager.id.in_(pkg_manager_ids)).group_by(
                    Request.id).having(
                        func.count(PackageManager.id) == len(pkg_manager_ids)))
    try:
        per_page = int(flask.request.args.get("per_page", 10))
    except ValueError:
        per_page = 10
    pagination_query = query.paginate(per_page=per_page,
                                      max_per_page=max_per_page)
    requests = pagination_query.items
    query_params = {}
    if state:
        query_params["state"] = state
    if verbose:
        query_params["verbose"] = verbose
    response = {
        "items": [request.to_json(verbose=verbose) for request in requests],
        "meta": pagination_metadata(pagination_query, **query_params),
    }
    return flask.jsonify(response)
Esempio n. 20
0
def patch_request(request_id):
    """
    Modify the given request.

    :param int request_id: the request ID from the URL
    :return: a Flask JSON response
    :rtype: flask.Response
    :raise NotFound: if the request is not found
    :raise ValidationError: if the JSON is invalid
    """
    payload = flask.request.get_json()
    if not isinstance(payload, dict):
        raise ValidationError("The input data must be a JSON object")

    if not payload:
        raise ValidationError(
            "At least one key must be specified to update the request")

    valid_keys = {
        "dependencies",
        "environment_variables",
        "package",
        "package_subpath",
        "state",
        "state_reason",
    }
    invalid_keys = set(payload.keys()) - valid_keys
    if invalid_keys:
        raise ValidationError("The following keys are not allowed: {}".format(
            ", ".join(invalid_keys)))

    for key, value in payload.items():
        if key == "dependencies":
            if not isinstance(value, list):
                raise ValidationError(
                    'The value for "dependencies" must be an array')
            if "package" not in payload:
                raise ValidationError(
                    'The "package" object must also be provided if the "dependencies" array is '
                    "provided")
            for dep in value:
                Dependency.validate_json(dep, for_update=True)
        elif key == "package":
            Package.validate_json(value)
        elif key == "environment_variables":
            if not isinstance(value, dict):
                raise ValidationError(
                    'The value for "{}" must be an object'.format(key))
            for env_var_name, env_var_info in value.items():
                EnvironmentVariable.validate_json(env_var_name, env_var_info)
        elif not isinstance(value, str):
            raise ValidationError(
                'The value for "{}" must be a string'.format(key))

    if "package_subpath" in payload and "package" not in payload:
        raise ValidationError(
            'The "package" object must also be provided if "package_subpath" is provided'
        )

    if "state" in payload and "state_reason" not in payload:
        raise ValidationError(
            'The "state_reason" key is required when "state" is supplied')
    elif "state_reason" in payload and "state" not in payload:
        raise ValidationError(
            'The "state" key is required when "state_reason" is supplied')

    request = Request.query.get_or_404(request_id)
    delete_bundle = False
    delete_bundle_temp = False
    cleanup_nexus = []
    delete_logs = False
    if "state" in payload and "state_reason" in payload:
        new_state = payload["state"]
        delete_bundle = new_state == "stale" and request.state.state_name != "failed"
        if new_state in ("stale", "failed"):
            for pkg_manager in ["npm", "pip", "yarn"]:
                if any(p.name == pkg_manager for p in request.pkg_managers):
                    cleanup_nexus.append(pkg_manager)
        delete_bundle_temp = new_state in ("complete", "failed")
        delete_logs = new_state == "stale"
        new_state_reason = payload["state_reason"]
        # This is to protect against a Celery task getting executed twice and setting the
        # state each time
        if request.state.state_name == new_state and request.state.state_reason == new_state_reason:
            flask.current_app.logger.info(
                "Not adding a new state since it matches the last state")
        else:
            request.add_state(new_state, new_state_reason)

    package_object = None
    if "package" in payload:
        package_object = Package.get_or_create(payload["package"])

        package_attrs = {}
        # The presence of "package_subpath" in payload indicates whether to modify the subpath.
        # This is only allowed when creating a new package, so when the PATCH API is used to
        # modify an existing package, the user must make sure to use the same subpath (or no
        # subpath).
        if "package_subpath" in payload:
            package_attrs["subpath"] = payload["package_subpath"]

        request.add_package(package_object, **package_attrs)

    for dep_and_replaces in payload.get("dependencies", []):
        dep = copy.deepcopy(dep_and_replaces)
        replaces = dep.pop("replaces", None)

        dep_object = Dependency.get_or_create(dep)
        replaces_object = None
        if replaces:
            replaces_object = Dependency.get_or_create(replaces)
        request.add_dependency(package_object, dep_object, replaces_object)

    for env_var_name, env_var_info in payload.get("environment_variables",
                                                  {}).items():
        env_var_obj = EnvironmentVariable.query.filter_by(
            name=env_var_name, **env_var_info).first()
        if not env_var_obj:
            env_var_obj = EnvironmentVariable.from_json(
                env_var_name, env_var_info)
            db.session.add(env_var_obj)

        if env_var_obj not in request.environment_variables:
            request.environment_variables.append(env_var_obj)

    db.session.commit()

    bundle_dir = RequestBundleDir(
        request.id, root=flask.current_app.config["CACHITO_BUNDLES_DIR"])

    if delete_bundle and bundle_dir.bundle_archive_file.exists():
        flask.current_app.logger.info("Deleting the bundle archive %s",
                                      bundle_dir.bundle_archive_file)
        try:
            bundle_dir.bundle_archive_file.unlink()
        except:  # noqa E722
            flask.current_app.logger.exception(
                "Failed to delete the bundle archive %s",
                bundle_dir.bundle_archive_file)

    if delete_bundle_temp and bundle_dir.exists():
        flask.current_app.logger.debug(
            "Deleting the temporary files used to create the bundle at %s",
            bundle_dir)
        try:
            bundle_dir.rmtree()
        except:  # noqa E722
            flask.current_app.logger.exception(
                "Failed to delete the temporary files at %s", bundle_dir)

    if delete_logs:
        request_log_dir = flask.current_app.config[
            "CACHITO_REQUEST_FILE_LOGS_DIR"]
        path_to_file = os.path.join(request_log_dir, f"{request_id}.log")
        try:
            os.remove(path_to_file)
        except:  # noqa E722
            flask.current_app.logger.exception(
                "Failed to delete the log file %s", path_to_file)

    for pkg_mgr in cleanup_nexus:
        flask.current_app.logger.info(
            "Cleaning up the Nexus %s content for request %d", pkg_mgr,
            request_id)
        cleanup_task = getattr(tasks, f"cleanup_{pkg_mgr}_request")
        try:
            cleanup_task.delay(request_id)
        except kombu.exceptions.OperationalError:
            flask.current_app.logger.exception(
                "Failed to schedule the cleanup_%s_request task for request %d. An administrator "
                "must clean this up manually.",
                pkg_mgr,
                request.id,
            )

    if current_user.is_authenticated:
        flask.current_app.logger.info("The user %s patched request %d",
                                      current_user.username, request.id)
    else:
        flask.current_app.logger.info("An anonymous user patched request %d",
                                      request.id)

    return flask.jsonify(request.to_json()), 200
Esempio n. 21
0
def resolve_npm(app_source_path, request, skip_deps=None):
    """
    Resolve and fetch npm dependencies for the given app source archive.

    :param str app_source_path: the full path to the application source code
    :param dict request: the Cachito request this is for
    :param set skip_deps: a set of dependency identifiers to not download because they've already
        been downloaded for this request
    :return: a dictionary that has the following keys:
        ``deps`` which is the list of dependencies,
        ``downloaded_deps`` which is a set of the dependency identifiers of the dependencies that
        were downloaded as part of this function's execution,
        ``lock_file`` which is the lock file if it was modified,
        ``lock_file_name`` is the name of the lock file that was used,
        ``package`` which is the dictionary describing the main package, and
        ``package.json`` which is the package.json file if it was modified.
    :rtype: dict
    :raises CachitoError: if fetching the dependencies fails or required files are missing
    :raises ValidationError: if lock file does not have the correct format
    """
    # npm-shrinkwrap.json and package-lock.json share the same format but serve slightly
    # different purposes. See the following documentation for more information:
    # https://docs.npmjs.com/files/package-lock.json.
    for lock_file in ("npm-shrinkwrap.json", "package-lock.json"):
        package_lock_path = os.path.join(app_source_path, lock_file)
        if os.path.exists(package_lock_path):
            break
    else:
        raise CachitoError(
            "The npm-shrinkwrap.json or package-lock.json file must be present for the npm "
            "package manager"
        )

    package_json_path = os.path.join(app_source_path, "package.json")
    if not os.path.exists(package_json_path):
        raise CachitoError("The package.json file must be present for the npm package manager")

    try:
        package_and_deps_info = get_package_and_deps(package_json_path, package_lock_path)
    except KeyError as e:
        msg = f"The lock file {lock_file} has an unexpected format (missing key: {e})"
        log.exception(msg)
        raise ValidationError(msg)

    package_and_deps_info["lock_file_name"] = lock_file
    # By downloading the dependencies, it stores the tarballs in the bundle and also stages the
    # content in the npm repository for the request
    proxy_repo_url = get_npm_proxy_repo_url(request["id"])
    bundle_dir = RequestBundleDir(request["id"])
    bundle_dir.npm_deps_dir.mkdir(exist_ok=True)
    package_and_deps_info["downloaded_deps"] = download_dependencies(
        bundle_dir.npm_deps_dir, package_and_deps_info["deps"], proxy_repo_url, skip_deps,
    )

    # Remove all the "bundled" keys since that is an implementation detail that should not be
    # exposed outside of this function
    for dep in package_and_deps_info["deps"]:
        dep.pop("bundled")
        dep.pop("version_in_nexus")

    return package_and_deps_info