Exemple #1
0
def test_split_chained_sub_model_plans(
        tmp_path: pathlib.Path, simplified_nist_catalog: oscatalog.Catalog,
        keep_cwd: pathlib.Path) -> None:
    """Test for split_model method with chained sum models like catalog.metadata.parties.*."""
    # Assume we are running a command like below
    # trestle split -f catalog.json -e catalog.metadata.parties.*
    # see https://github.com/IBM/compliance-trestle/issues/172
    content_type = FileContentType.JSON

    # prepare trestle project dir with the file
    catalog_dir, catalog_file = test_utils.prepare_trestle_project_dir(
        tmp_path, content_type, simplified_nist_catalog,
        test_utils.CATALOGS_DIR)

    # read the model from file
    catalog = oscatalog.Catalog.oscal_read(catalog_file)
    element = Element(catalog)
    element_args = ['catalog.metadata.parties.*']
    element_paths = cmd_utils.parse_element_args(
        None, element_args, catalog_dir.relative_to(tmp_path))
    assert 2 == len(element_paths)

    expected_plan = Plan()

    # prepare to extract metadata and parties
    metadata_file = catalog_dir / element_paths[0].to_file_path(content_type)
    metadata_field_alias = element_paths[0].get_element_name()
    metadata = element.get_at(element_paths[0])
    meta_element = Element(metadata, metadata_field_alias)

    # extract parties
    parties_dir = catalog_dir / 'catalog/metadata/parties'
    for i, party in enumerate(meta_element.get_at(element_paths[1], False)):
        prefix = str(i).zfill(const.FILE_DIGIT_PREFIX_LENGTH)
        sub_model_actions = SplitCmd.prepare_sub_model_split_actions(
            party, parties_dir, prefix, content_type)
        expected_plan.add_actions(sub_model_actions)

    # stripped metadata
    stripped_metadata = metadata.stripped_instance(
        stripped_fields_aliases=['parties'])
    expected_plan.add_action(CreatePathAction(metadata_file))
    expected_plan.add_action(
        WriteFileAction(metadata_file,
                        Element(stripped_metadata, metadata_field_alias),
                        content_type))

    # stripped catalog
    root_file = catalog_dir / element_paths[0].to_root_path(content_type)
    remaining_root = element.get().stripped_instance(metadata_field_alias)
    expected_plan.add_action(CreatePathAction(root_file, True))
    expected_plan.add_action(
        WriteFileAction(root_file, Element(remaining_root), content_type))

    split_plan = SplitCmd.split_model(catalog, element_paths, catalog_dir,
                                      content_type, '', None)
    assert expected_plan == split_plan
Exemple #2
0
def test_subsequent_split_model(
        tmp_path: pathlib.Path,
        sample_target_def: ostarget.TargetDefinition) -> None:
    """Test subsequent split of sub models."""
    # Assume we are running a command like below
    # trestle split -f target-definition.yaml -e target-definition.metadata

    content_type = FileContentType.YAML

    # prepare trestle project dir with the file
    target_def_dir, target_def_file = test_utils.prepare_trestle_project_dir(
        tmp_path, content_type, sample_target_def, test_utils.TARGET_DEFS_DIR)

    # first split the target-def into metadata
    target_def = ostarget.TargetDefinition.oscal_read(target_def_file)
    element = Element(target_def, 'target-definition')
    element_args = ['target-definition.metadata']
    element_paths = test_utils.prepare_element_paths(target_def_dir,
                                                     element_args)
    metadata_file = target_def_dir / element_paths[0].to_file_path(
        content_type)
    metadata: ostarget.Metadata = element.get_at(element_paths[0])
    root_file = target_def_dir / element_paths[0].to_root_path(content_type)
    metadata_field_alias = element_paths[0].get_element_name()
    stripped_root = element.get().stripped_instance(
        stripped_fields_aliases=[metadata_field_alias])
    root_wrapper_alias = utils.classname_to_alias(
        stripped_root.__class__.__name__, 'json')

    first_plan = Plan()
    first_plan.add_action(CreatePathAction(metadata_file))
    first_plan.add_action(
        WriteFileAction(metadata_file, Element(metadata, metadata_field_alias),
                        content_type))
    first_plan.add_action(CreatePathAction(root_file, True))
    first_plan.add_action(
        WriteFileAction(root_file, Element(stripped_root, root_wrapper_alias),
                        content_type))
    first_plan.execute()  # this will split the files in the temp directory

    # now, prepare the expected plan to split metadta at parties
    second_plan = Plan()
    metadata_file_dir = target_def_dir / element_paths[0].to_root_path()
    metadata2 = ostarget.Metadata.oscal_read(metadata_file)
    element = Element(metadata2, metadata_field_alias)

    element_args = ['metadata.parties.*']
    element_paths = test_utils.prepare_element_paths(target_def_dir,
                                                     element_args)
    parties_dir = metadata_file_dir / element_paths[0].to_file_path()
    for i, party in enumerate(element.get_at(element_paths[0])):
        prefix = str(i).zfill(const.FILE_DIGIT_PREFIX_LENGTH)
        sub_model_actions = SplitCmd.prepare_sub_model_split_actions(
            party, parties_dir, prefix, content_type)
        second_plan.add_actions(sub_model_actions)

    # stripped metadata
    stripped_metadata = metadata2.stripped_instance(
        stripped_fields_aliases=['parties'])
    second_plan.add_action(CreatePathAction(metadata_file, True))
    second_plan.add_action(
        WriteFileAction(metadata_file,
                        Element(stripped_metadata, metadata_field_alias),
                        content_type))

    # call the split command and compare the plans
    split_plan = SplitCmd.split_model(metadata, element_paths,
                                      metadata_file_dir, content_type)
    assert second_plan == split_plan
Exemple #3
0
    def split_model_at_path_chain(cls,
                                  model_obj: OscalBaseModel,
                                  element_paths: List[ElementPath],
                                  base_dir: pathlib.Path,
                                  content_type: FileContentType,
                                  cur_path_index: int,
                                  split_plan: Plan,
                                  strip_root: bool,
                                  root_file_name: str = '') -> int:
        """Recursively split the model at the provided chain of element paths.

        It assumes that a chain of element paths starts at the cur_path_index with the first path ending
        with a wildcard (*)

        It returns the index where the chain of path ends.

        For example, element paths could have a list of paths as below for a `TargetDefinition` model where
        the first path is the start of the chain.

        For each of the sub model described by the first element path (e.g target-defintion.targets.*) in the chain,
        the subsequent paths (e.g. target.target-control-implementations.*) will be applied recursively to retrieve
        the sub-sub models:
        [
            'target-definition.targets.*',
            'target.target-control-implementations.*'
        ]
        for a command like below:
           trestle split -f target.yaml -e target-definition.targets.*.target-control-implementations.*
        """
        # assume we ran the command below:
        # trestle split -f target.yaml -e target-definition.targets.*.target-control-implementations.*

        if split_plan is None:
            raise TrestleError('Split plan must have been initialized')

        if cur_path_index < 0:
            raise TrestleError(
                'Current index of the chain of paths cannot be less than 0')

        # if there are no more element_paths, return the current plan
        if cur_path_index >= len(element_paths):
            return cur_path_index

        # initialize local variables
        element = Element(model_obj)
        stripped_field_alias = []

        # get the sub_model specified by the element_path of this round
        element_path = element_paths[cur_path_index]
        is_parent = cur_path_index + 1 < len(element_paths) and element_paths[
            cur_path_index + 1].get_parent() == element_path

        # root dir name for sub models dir
        # 00000__group.json will have the root_dir name as 00000__group for sub models of group
        # catalog.json will have the root_dir name as catalog sub models
        root_dir = ''
        if root_file_name != '':
            root_dir = pathlib.Path(root_file_name).stem

        # check that the path is not multiple level deep
        path_parts = element_path.get()
        if path_parts[-1] == ElementPath.WILDCARD:
            path_parts = path_parts[:-1]

        if len(path_parts) > 2:
            msg = 'Trestle supports split of first level children only, '
            msg += f'found path "{element_path}" with level = {len(path_parts)}'
            raise TrestleError(msg)

        sub_models = element.get_at(
            element_path,
            False)  # we call sub_models as in plural, but it can be just one
        if sub_models is None:
            return cur_path_index

        # assume cur_path_index is the end of the chain
        # value of this variable may change during recursive split of the sub-models below
        path_chain_end = cur_path_index

        # if wildcard is present in the element_path and the next path in the chain has current path as the parent,
        # we need to split recursively and create separate file for each sub item
        # for example, in the first round we get the `targets` using the path `target-definition.targets.*`
        # so, now we need to split each of the target recursively. Note that target is an instance of dict
        # However, there can be other sub_model, which is of type list
        if is_parent and element_path.get_last() is not ElementPath.WILDCARD:
            # create dir for all sub model items
            sub_models_dir = base_dir / element_path.to_root_path()
            sub_model_plan = Plan()
            path_chain_end = cls.split_model_at_path_chain(
                sub_models, element_paths, sub_models_dir, content_type,
                cur_path_index + 1, sub_model_plan, True)
            sub_model_actions = sub_model_plan.get_actions()
            split_plan.add_actions(sub_model_actions)
        elif element_path.get_last() == ElementPath.WILDCARD:
            # extract sub-models into a dict with appropriate prefix
            sub_model_items: Dict[str, OscalBaseModel] = {}
            sub_models_dir = base_dir / element_path.to_file_path(
                root_dir=root_dir)
            if isinstance(sub_models, list):
                for i, sub_model_item in enumerate(sub_models):
                    # e.g. `groups/00000_groups/`
                    prefix = str(i).zfill(const.FILE_DIGIT_PREFIX_LENGTH)
                    sub_model_items[prefix] = sub_model_item
            elif isinstance(sub_models, dict):
                # prefix is the key of the dict
                sub_model_items = sub_models
            else:
                # unexpected sub model type for multi-level split with wildcard
                raise TrestleError(
                    f'Sub element at {element_path} is not of type list or dict for further split'
                )

            # process list sub model items
            for key in sub_model_items:
                prefix = key
                sub_model_item = sub_model_items[key]

                # recursively split the sub-model if there are more element paths to traverse
                # e.g. split target.target-control-implementations.*
                require_recursive_split = cur_path_index + 1 < len(
                    element_paths) and element_paths[
                        cur_path_index + 1].get_parent() == element_path

                if require_recursive_split:
                    # prepare individual directory for each sub-model
                    # e.g. `targets/<UUID>__target/`
                    sub_root_file_name = cmd_utils.to_model_file_name(
                        sub_model_item, prefix, content_type)
                    sub_model_plan = Plan()

                    path_chain_end = cls.split_model_at_path_chain(
                        sub_model_item, element_paths, sub_models_dir,
                        content_type, cur_path_index + 1, sub_model_plan, True,
                        sub_root_file_name)
                    sub_model_actions = sub_model_plan.get_actions()
                else:
                    sub_model_actions = cls.prepare_sub_model_split_actions(
                        sub_model_item, sub_models_dir, prefix, content_type)

                split_plan.add_actions(sub_model_actions)
        else:
            # the chain of path ends at the current index.
            # so no recursive call. Let's just write the sub model to the file and get out
            sub_model_file = base_dir / element_path.to_file_path(
                content_type, root_dir=root_dir)
            split_plan.add_action(CreatePathAction(sub_model_file))
            split_plan.add_action(
                WriteFileAction(
                    sub_model_file,
                    Element(sub_models, element_path.get_element_name()),
                    content_type))

        # Strip the root model and add a WriteAction for the updated model object in the plan
        if strip_root:
            stripped_field_alias.append(element_path.get_element_name())
            stripped_root = model_obj.stripped_instance(
                stripped_fields_aliases=stripped_field_alias)
            if root_file_name != '':
                root_file = base_dir / root_file_name
            else:
                root_file = base_dir / element_path.to_root_path(content_type)

            split_plan.add_action(CreatePathAction(root_file))
            wrapper_alias = utils.classname_to_alias(
                stripped_root.__class__.__name__, 'json')
            split_plan.add_action(
                WriteFileAction(root_file, Element(stripped_root,
                                                   wrapper_alias),
                                content_type))

        # return the end of the current path chain
        return path_chain_end
Exemple #4
0
    def split_model_at_path_chain(
        cls,
        model_obj: OscalBaseModel,
        element_paths: List[ElementPath],
        base_dir: pathlib.Path,
        content_type: FileContentType,
        cur_path_index: int,
        split_plan: Plan,
        strip_root: bool,
        root_file_name: str,
        aliases_to_strip: Dict[str, AliasTracker],
        last_one: bool = True
    ) -> int:
        """Recursively split the model at the provided chain of element paths.

        It assumes that a chain of element paths starts at the cur_path_index with the first path ending
        with a wildcard (*)

        If the wildcard follows an element that is inherently a list of items, the list of items is extracted.
        But if the wildcard follows a generic model than members of that model class found in the model will be
        split off.  But only the non-trivial elements are removed, i.e. not str, int, datetime, etc.

        Args:
            model_obj: The OscalBaseModel to be split
            element_paths: The List[ElementPath] of elements to split, including embedded wildcards
            base_dir: pathlib.Path of the file being split
            content_type: json or yaml files
            cur_path_index: Index into the list of element paths for the current split operation
            split_plan: The accumulated plan of actions needed to perform the split
            strip_root: Whether to strip elements from the root object
            root_file_name: Filename of root file that gets split into a list of items
            aliases_to_strip: AliasTracker previously loaded with aliases that need to be split from each element
            last_one: bool indicating last item in array has been split and stripped model can now be written

        Returns:
            int representing the index where the chain of the path ends.

        Examples:
            For example, element paths could have a list of paths as below for a `ComponentDefinition` model where
            the first path is the start of the chain.

            For each of the sub model described by the first element path (e.g component-defintion.components.*) in the
            chain, the subsequent paths (e.g component.control-implementations.*) will be applied recursively
            to retrieve the sub-sub models:
            [
                'component-definition.component.*',
                'component.control-implementations.*'
            ]
            for a command like below:
            trestle split -f component.yaml -e component-definition.components.*.control-implementations.*
        """
        if split_plan is None:
            raise TrestleError('Split plan must have been initialized')

        if cur_path_index < 0:
            raise TrestleError('Current index of the chain of paths cannot be less than 0')

        # if there are no more element_paths, return the current plan
        if cur_path_index >= len(element_paths):
            return cur_path_index

        # initialize local variables
        element = Element(model_obj)
        stripped_field_alias: List[str] = []

        # get the sub_model specified by the element_path of this round
        element_path = element_paths[cur_path_index]

        # does the next element_path point back at me
        is_parent = cur_path_index + 1 < len(element_paths) and element_paths[cur_path_index
                                                                              + 1].get_parent() == element_path

        # root dir name for sub models dir
        # 00000__group.json will have the root_dir name as 00000__group for sub models of group
        # catalog.json will have the root_dir name as catalog
        root_dir = ''
        if root_file_name != '':
            root_dir = str(pathlib.Path(root_file_name).with_suffix(''))

        sub_models = element.get_at(element_path, False)  # we call sub_models as in plural, but it can be just one

        # assume cur_path_index is the end of the chain
        # value of this variable may change during recursive split of the sub-models below
        path_chain_end = cur_path_index

        # if wildcard is present in the element_path and the next path in the chain has current path as the parent,
        # Then deal with case of list, or split of arbitrary oscalbasemodel
        if is_parent and element_path.get_last() is not ElementPath.WILDCARD:
            # create dir for all sub model items
            sub_models_dir = base_dir / element_path.to_root_path()
            sub_model_plan = Plan()
            path_chain_end = cls.split_model_at_path_chain(
                sub_models,
                element_paths,
                sub_models_dir,
                content_type,
                cur_path_index + 1,
                sub_model_plan,
                True,
                '',
                aliases_to_strip
            )
            sub_model_actions = sub_model_plan.get_actions()
            split_plan.add_actions(sub_model_actions)
        elif element_path.get_last() == ElementPath.WILDCARD:
            # extract sub-models into a dict with appropriate prefix
            sub_model_items: Dict[str, OscalBaseModel] = {}
            sub_models_dir = base_dir / element_path.to_file_path(root_dir=root_dir)
            if isinstance(sub_models, list):
                for i, sub_model_item in enumerate(sub_models):
                    # e.g. `groups/00000_groups/`
                    prefix = str(i).zfill(const.FILE_DIGIT_PREFIX_LENGTH)
                    sub_model_items[prefix] = sub_model_item

            # process list sub model items
            count = 0
            for key, sub_model_item in sub_model_items.items():
                count += 1
                # recursively split the sub-model if there are more element paths to traverse
                # e.g. split component.control-implementations.*
                require_recursive_split = cur_path_index + 1 < len(element_paths) and element_paths[
                    cur_path_index + 1].get_parent() == element_path

                if require_recursive_split:
                    # prepare individual directory for each sub-model
                    sub_root_file_name = cmd_utils.to_model_file_name(sub_model_item, key, content_type)
                    sub_model_plan = Plan()

                    last_one: bool = count == len(sub_model_items)
                    path_chain_end = cls.split_model_at_path_chain(
                        sub_model_item,
                        element_paths,
                        sub_models_dir,
                        content_type,
                        cur_path_index + 1,
                        sub_model_plan,
                        True,
                        sub_root_file_name,
                        aliases_to_strip,
                        last_one
                    )
                    sub_model_actions = sub_model_plan.get_actions()
                else:
                    sub_model_actions = cls.prepare_sub_model_split_actions(
                        sub_model_item, sub_models_dir, key, content_type
                    )

                split_plan.add_actions(sub_model_actions)
        else:
            # the chain of path ends at the current index.
            # so no recursive call. Let's just write the sub model to the file and get out
            if sub_models is not None:
                sub_model_file = base_dir / element_path.to_file_path(content_type, root_dir=root_dir)
                split_plan.add_action(CreatePathAction(sub_model_file))
                split_plan.add_action(
                    WriteFileAction(sub_model_file, Element(sub_models, element_path.get_element_name()), content_type)
                )

        # Strip the root model and add a WriteAction for the updated model object in the plan
        if strip_root:
            full_path = element_path.get_full()
            path = '.'.join(full_path.split('.')[:-1])
            aliases = [element_path.get_element_name()]
            need_to_write = True
            use_alias_dict = aliases_to_strip is not None and path in aliases_to_strip
            if use_alias_dict:
                aliases = aliases_to_strip[path].get_aliases()
                need_to_write = aliases_to_strip[path].needs_writing()

            stripped_model = model_obj.stripped_instance(stripped_fields_aliases=aliases)
            # can mark it written even if it doesn't need writing since it is empty
            # but if an array only mark it written if it's the last one
            if last_one and use_alias_dict:
                aliases_to_strip[path].mark_written()
            # If it's an empty model after stripping the fields, don't create path and don't write
            field_list = [x for x in model_obj.__fields__.keys() if model_obj.__fields__[x] is not None]
            if set(field_list) == set(stripped_field_alias):
                return path_chain_end

            if need_to_write:
                if root_file_name != '':
                    root_file = base_dir / root_file_name
                else:
                    root_file = base_dir / element_path.to_root_path(content_type)

                split_plan.add_action(CreatePathAction(root_file))
                wrapper_alias = classname_to_alias(stripped_model.__class__.__name__, AliasMode.JSON)
                split_plan.add_action(WriteFileAction(root_file, Element(stripped_model, wrapper_alias), content_type))

        # return the end of the current path chain
        return path_chain_end