예제 #1
0
    def persistManifest(self, parent_trace, manifest_dict):
        '''
        Persists manifest_dict as a yaml object and returns a ManifestHandle that uniquely identifies it.
        '''
        kind = manifest_dict['kind']
        suffix = ''

        version = ManifestUtils().get_manifest_version(parent_trace,
                                                       manifest_dict)
        if version != None and len(str(version).strip()) > 0:
            suffix = '.' + str(version)
        manifest_file = self.test_case_name + "." + kind + suffix + ".yaml"
        my_trace = parent_trace.doing("Persisting manifest",
                                      data={
                                          'manifests_dir':
                                          self.output_manifests_dir,
                                          'manifest_file': manifest_file
                                      },
                                      origination={
                                          'concrete class':
                                          str(self.__class__.__name__),
                                          'signaled_from':
                                          __file__
                                      })
        if True:
            YAML_Utils().save(my_trace,
                              data_dict=manifest_dict,
                              path=self.output_manifests_dir + '/' +
                              manifest_file)
            handle = ManifestUtils().inferHandle(my_trace, manifest_dict)
            return handle
    def _prepare_yaml_comparison(self, parent_trace, output_dict, test_output_name, 
                                    output_data_dir, expected_data_dir, save_output_dict=False):
        '''
        Helper method that does most of the heavy lifting when we seek to compare yaml-based
        expected output.

        On success, this returns a dictionary corresponding to the expected output, that the caller can then compare
        with the parameter `output_dict`
        
        The motivation for the existence of this method is that there are different ways of comparing yaml
        files. For example:
        * A "pure" comparison, done by method `_compare_to_expected_yaml`
        * A "tolerance-based" comparison, done by method `_compare_yaml_within_tolerance`. This is used, for 
          example, to validate expected output where the contents of file systems is displayed. In such cases,
          generated Excel files may display a size that differs by 1 or 2 bytes because of the non-determinism involved
          in creating Excel files since "xlsx" files are really zip files with XML contents, and it is well
          known that zip files are non-deterministcically created (for example, see 
          https://medium.com/@pat_wilson/building-deterministic-zip-files-with-built-in-commands-741275116a19).

          Sometimes simply running Apodeixi in a different deployment or machine will cause generated Excel
          files to change in size by 19 bytes or more. 

          So the tolerance level can be configured in the test_config.yaml, a file that is located under the
          root folder for the testing database that you have configured to use in ApodeixiConfig (i.e., 
          the folder above the knowledge base's root folder configured in ApodeixiConfig.)
        '''
        # Check not null, or else rest of actions will "gracefully do nothing" and give the false impression that test passes
        # (at least it would erroneously pass when the expected output is set to an empty file)
        self.assertIsNotNone(output_dict)

        PathUtils().create_path_if_needed(parent_trace = parent_trace, path = output_data_dir)

        # Persist output (based on save_output_dict flag)
        if save_output_dict:
            YAML_Utils().save(  parent_trace, 
                                data_dict       = output_dict, 
                                path            = output_data_dir + '/' + test_output_name + '_OUTPUT.yaml')

        # Retrieve expected output
        expected_dict               = YAML_Utils().load(parent_trace, 
                                                        path = expected_data_dir + '/' + test_output_name + '_EXPECTED.yaml')

        return expected_dict
    def _compare_to_expected_yaml(self, parent_trace, output_dict, test_output_name, 
                                    output_data_dir, expected_data_dir, save_output_dict=False):
        '''
        Utility method for derived classes that create YAML files and need to check they match an expected output
        previously saves as a YAML file as well. 

        It also saves the output as a yaml file, which can be copied to be the expected output when test case is created.
        
        @param output_data_dir Directory to which to save any output.
        @param expected_data_dir Directory from which to retrieve any previously saved expected output.
        '''
        expected_dict               = self._prepare_yaml_comparison(    parent_trace        = parent_trace,
                                                                        output_dict         = output_dict, 
                                                                        test_output_name    = test_output_name, 
                                                                        output_data_dir     = output_data_dir, 
                                                                        expected_data_dir   = expected_data_dir, 
                                                                        save_output_dict    = save_output_dict)
        result_yaml                 = YAML_Utils().dict_to_yaml_string(parent_trace, data_dict = output_dict)

        expected_yaml               = YAML_Utils().dict_to_yaml_string(parent_trace, data_dict = expected_dict)

        self.assertEqual(result_yaml, expected_yaml)
예제 #4
0
    def _getMatchingManifests(self, parent_trace, folder, manifest_handle):
        '''
        Returns two lists of the same length:

        * A list of dictionaries, one per manifest that matches the given manifest handle
        * A list of filenames, which is where each of those manifests was retrieved from

        The search is done over the space of objects in the store that lie "at or below the folder", where
        the notion of "folder" depends on the concrete store class. For filesystem-based stores, "folder" would
        literally be a directory of some filesystem mount.

        @param folder A string scoping a subset of the store
        @param manifest_handle A ManifestHandle instance that (should) uniquely identify a single manifest in the store
        @param suffix A string representing a valid "file extension" type used for manifests, where the logical
                        notion of "file extension" is up to each concrete store class to define. For filesystem-based
                        stores, the suffix string is literally a file extension in the filesystem, such as ".yaml"
                        for stores that persist manifests as yaml files.
        '''
        matching_manifests = []  # List of dictionaries, one per manifest
        matching_filenames = [
        ]  # List of filename strings. Will be 1-1 lined up with matching_manifests

        # Two areas where to search for manifests: input area, and output area. First the input area
        for filename in self._getFilenames(parent_trace, folder):
            my_trace = parent_trace.doing("Loading manifest from file",
                                          data={
                                              'filename': filename,
                                              'folder': folder
                                          },
                                          origination={
                                              'concrete class':
                                              str(self.__class__.__name__),
                                              'signaled_from':
                                              __file__
                                          })
            manifest_dict = YAML_Utils().load(my_trace,
                                              path=folder + '/' + filename)
            inferred_handle = ManifestUtils().inferHandle(
                my_trace, manifest_dict)
            if inferred_handle == manifest_handle:
                matching_filenames.append(filename)
                matching_manifests.append(manifest_dict)

        return matching_manifests, matching_filenames
예제 #5
0
    def save_environment_metadata(self, parent_trace):
        '''
        Creates and saves a YAML file called "METATATA.yaml" in the root folder for self.
        It is sufficient information from which to re-create the environment (for example, if it was
        created in a different Python process, so this Python process wouldn't have an in-memory object
        for it unless it loads it, leveraing the "METADATA.yaml" file).

        This can happen when the CLI creates a sandbox that will later be used by subsequent commands.
        Since each CLI invocation is its own Python process, different invocations can only share the
        same sandbox environment if there is a way to persist and then load the state of an environment.
        '''
        ME = File_KBEnv_Impl
        METADATA_FILENAME = "METADATA.yaml"
        metadata_dict = {}
        metadata_dict['name'] = self.name(parent_trace)
        metadata_dict['parent'] = self.parent(parent_trace).name(parent_trace)
        metadata_dict['postingsURL'] = self.postingsURL(parent_trace)
        metadata_dict['manifestsURL'] = self.manifestsURL(parent_trace)
        metadata_dict['clientURL'] = self.clientURL(parent_trace)

        config = self.config(parent_trace)

        config_dict = {}
        config_dict['read_misses_policy'] = config.read_misses_policy
        config_dict['use_timestamps'] = config.use_timestamps

        metadata_dict['config'] = config_dict

        if self == self._store.base_environment(parent_trace):
            environment_dir                 = _os.path.dirname(self._store.base_environment(parent_trace). \
                                                    manifestsURL(parent_trace))
        else:
            root_dir                        = _os.path.dirname(self._store.base_environment(parent_trace). \
                                                    manifestsURL(parent_trace))
            envs_dir = root_dir + "/" + ME.ENVS_FOLDER
            environment_dir = envs_dir + "/" + self.name(parent_trace)

        PathUtils().create_path_if_needed(parent_trace, environment_dir)

        YAML_Utils().save(parent_trace,
                          data_dict=metadata_dict,
                          path=environment_dir + "/" + METADATA_FILENAME)
예제 #6
0
    def yaml_2_df(self, parent_trace, manifests_folder, manifests_file,
                  contents_path, sparse, abbreviate_uids):
        '''
        Loads a YAML file for an Apodeixi manifest, and returns a Pandas Dataframe for the data contents
        and a dictionary for all other fields
        
        @param sparse A boolean. If True, it returns a "sparse" representation suitable for Excel rendering,
                    with exactly 1 UID per row (helpful when making joins). 

                    If on the other hand sparse=False then a "full" representation is returned, more suitable
                    for data analysis in Pandas. 

                    For examples and details, refer to the documentation for`self.dict_2_df`

        @param contents_path A string using 'dot notation' to convey a path in a dictionary. For example,
                             for a dictionary  like this:
        .. code::
            
            {a: 
                {b: 5, c: 6, streams: {W1: {UID: S1.W1, cost: 4, name: 'requirements gathering'}, 
                                       W2: {UID: S1.W2, cost: 5, name: 'design'}},
                 g: 23
                }
            }
            
        then if contents-path=`a.streams` that denotes the sub-tree 
        
        .. code::

            {W1: {UID: S1.W1, cost: 4, name: 'requirements gathering'}, 
                                       W2: {UID: S1.W2, cost: 5, name: 'design'}}
 
        which will be turned into DataFrame like

        .. code::

            df =    | UID   |     streams               | cost  |
                    ---------------------------------------------
                    | S1.W1 | requirements gathering    |   4    |
                    | S1.W2 | design                    |   5    |
        
        The function also computes the remaining subtree, which in this example is:

        .. code::
            
            {a: 
                {b: 5, c: 6, 
                 g: 23
                }
            }
        
        The return value is the tuple `(df, subtree)`

        @param abbreviate_uids A boolean. If True, UIDs will only keep the top acronym. For example, 
                    a UID like "BR2.MR2.SM4" in the manifest would be transformed to "BR2.2.4" in the
                    DataFrame returned by this method
        '''
        manifest_path = manifests_folder + '/' + manifests_file
        my_trace = parent_trace.doing('Loading YAML Manifest',
                                      data={'path': manifest_path})
        manifest_dict = YAML_Utils().load(my_trace, path=manifest_path)
        path_tokens = contents_path.split('.')

        # Create the dictionary of everything except what is in the path
        my_trace = parent_trace.doing('Splitting manifest',
                                      data={'path_tokens': path_tokens})
        content_dict, non_content_dict = self._split_out(
            my_trace, manifest_dict, path_tokens)

        df, uid_info_list = self.dict_2_df(parent_trace, content_dict,
                                           contents_path, sparse,
                                           abbreviate_uids)
        return df, non_content_dict
예제 #7
0
    def find_child_environment_from_metadata(self, parent_trace,
                                             child_env_name):
        '''
        Attempts to instantiate an immediate child environment based on its metadata.
        It must be an immediate child of self.

        This method deliberately only looks for an immediate child.
        
        However, before attempting to instantiate such an environment it checks if an environment
        with that name already exists in memory. If so, it defers to the in-memory object and does
        not load the metadata, returning the in-memory object instead.

        If no metadata for such an environment exists, or if it does but is not for an immediate
        child of self, then this method will return None.

        NOTE: It is important to return None (as opposed to raising an ApodeixiError) because this
        method will likely be called in a recursive search by self.findSubEnvironment, and it is
        "normal" in such a recursion to attempt calling this method from different "self" in the
        environment hierarcy. So for most but one of them, the child_env_name is not for a real
        child, so it is OK to return None. It would be erroneous to raise an ApodeixiError since that
        would cause the recursion of self.findSubEnvironment to abort before it has a chance to get
        to the real parent of `child-env_name`

        '''
        # First, determine if the environment exists in memory
        child_env = self.findSubEnvironmentInMemory(parent_trace,
                                                    child_env_name)
        if child_env != None:
            return child_env

        ME = File_KBEnv_Impl

        my_trace = parent_trace.doing(
            "Retrieving metadata for child environment",
            data={
                'child_env_name': child_env_name,
            })
        METADATA_FILENAME = "METADATA.yaml"
        root_dir = _os.path.dirname(
            self._store.base_environment(parent_trace).manifestsURL(
                parent_trace))
        envs_dir = root_dir + "/" + ME.ENVS_FOLDER
        environment_dir = envs_dir + "/" + child_env_name

        metadata_path = environment_dir + "/" + METADATA_FILENAME

        if not _os.path.exists(metadata_path):
            return None

        metadata_dict = YAML_Utils().load(my_trace, path=metadata_path)
        if self.name(my_trace) != metadata_dict['parent']:
            return None

        my_trace = parent_trace.doing(
            "Instantiating child environment from metadata",
            data={
                'child_env_name': child_env_name,
                'metadata': metadata_dict
            })

        config_dict = metadata_dict["config"]
        child_env_config = KB_Environment_Config(
            parent_trace=my_trace,
            read_misses_policy=config_dict["read_misses_policy"],
            use_timestamps=config_dict["use_timestamps"],
            path_mask=None,  # This was not persisted
        )

        # GOTCHA: When constructing the child_env_impl, we must give a parent_environment that is
        # of type KB_Environment.
        #
        # However, here we are an implementation class, so not derived from KB_Environment, so will need
        # first to find our wrapping KB_Environment object for which self is the impl
        base_environment = self._store.base_environment(my_trace)
        our_name = self.name(my_trace)
        if our_name == base_environment.name(my_trace):
            our_env = base_environment
        else:
            our_env = base_environment.findSubEnvironment(
                parent_trace=my_trace, name=our_name)

        child_env_impl = File_KBEnv_Impl(
            parent_trace=my_trace,
            name=child_env_name,
            store=self._store,
            parent_environment=our_env,
            config=child_env_config,
            postings_rootdir=metadata_dict["postingsURL"],
            manifests_roodir=metadata_dict["manifestsURL"],
            clientURL=metadata_dict["clientURL"])

        child_env = KB_Environment(parent_trace=my_trace, impl=child_env_impl)
        self._children[child_env_name] = child_env
        return child_env
예제 #8
0
    def overwrite_test_context(self, parent_trace):
        '''
        This is a "trick" method needed so that CLI invocations run in the environment isolated for this test case (or
        its children), as opposed to on the base environment.

        It accomplishes this by "fooling" the CLI into thinking that "base environment" is actually the environment
        isolated for this test case.

        It does so by overwriting the value of the self.CONFIG_DIRECTORY() environment variable
        but what is tricky is:

        * By the time this method is called, this class no longer needs the self.CONFIG_DIRECTORY() environment
          variable, since it was used in super().setUp() to initialize self.a6i_config and other properties, and
          that is as it should be. 

        * Therefore, the modification in this method to self.CONFIG_DIRECTORY() is not going to impact this
          test object. Instead, it will impact other objects that use it. There is no such object in Apodeixi itself,
          but there is one in the CLI: the KB_Session class.

        * The intent is then for the KB_Session class to initialize it's notion of self.a6i_config differently, so
          that it is "fooled" into thinking that the "base environment" is this test cases's isolated environment.

        * Each time the CLI is invoked, it constructs a KB_Session to initialiaze the KnowledgeBaseStore. Thus
          the CLI will be using a store pointing to this test case's isolated environment. This is different than
          for non-CLI tests, for whom the store points to the test knowledge base common to the Apodeixi test suite.
        '''
        # Before changing context, create the environment for this test, which will later become the
        # "fake base environment" when we switch context. But this uses the "original" store, so must be done
        # before we switch context, so we must select the stack here (and later we re-select it when
        # switching context)
        self.selectStack(parent_trace)
        self.provisionIsolatedEnvironment(parent_trace)

        # Remember original config before it is overwritten when we change context
        original_a6i_config = self.a6i_config

        # In case it is ever needed, remember this tests suite's value for the environment variable
        self.config_directory_for_this_test_object = _os.environ.get(
            self.CONFIG_DIRECTORY())

        # OK, we start the context switch here.
        # For this test case, we want the CLI to use a config file that is in the input folder
        _os.environ[
            self.CONFIG_DIRECTORY()] = self.input_data + "/" + self.scenario()

        # Each CLI test has a dedicated folder containing the test environment for it, i.e., an entire
        # test database (knowledge base folder, collaboration area folder) just for that test.
        # These folders are referenced in a test-specific apodeixi_config.toml.
        # In order to make the folders thus referenced not depend on the installation folder for the Apodeixi
        # test database, we introduce the environment variable ${TEST_DB_DIR} that should be used in all these
        # CLI-test-specific apodeixi_config.toml.
        #
        # Example: for CLI test for subproducts with id #1011, in the folder ...../test_db/input_data/1011/cli.subproducts,
        #   the apodeixi_config.toml should have a line like
        #       knowledge-base-root-folder = "${TEST_DB_DIR}/knowledge-base/envs/1011_ENV/kb"
        #   instead of
        #       knowledge-base-root-folder = "C:/Users/aleja/Documents/Code/chateauclaudia-labs/apodeixi/test_db/knowledge-base/envs/1011_ENV/kb"
        #
        #  Such practice ensures that the test harness continues to work no matter where it is installed (for example,
        # in a Docker container).
        # To make this approach work, we hereby set that environment variable, whose value will be consulted by the
        # ApodeixiConfig constructor when we do the context switch a few lines further below
        _os.environ[self.TEST_DB_DIR] = self.test_db_dir

        # Now overwrite parent's notion of self.a6i_config and of the self.test_config_dict
        self.a6i_config = ApodeixiConfig(parent_trace)
        self.selectStack(
            parent_trace
        )  # Re-creates the store for this test with the "fake" base environment

        # Set again the location of the test directory as per the original a6i config. We need it to mask non-deterministic
        # paths
        self.a6i_config.test_db_dir = original_a6i_config.test_db_dir

        # Next time an environment is provisioned for this test, use this overwritten config for the name of the folder
        self.test_config_dict = YAML_Utils().load(
            parent_trace,
            path=self.input_data + "/" + self.scenario() + '/test_config.yaml')
예제 #9
0
    def setUp(self):
        super().setUp()

        self.input_data = None  # Will be set later by method selectTestDataLocation
        self.results_data = None  # Will be set later by method selectTestDataLocation

        # Integration test cases must call self.selectTestDataLocation(-) to set self.results_data
        # and self.input_data.
        # This is required for 2 reasons:
        #   1. Ease of management - all integration tests are registered in test_config.yaml and
        #       their output is externalized from the Apodeixi code base
        #   2. Reduce the size of folder structure by placing output in a less nested directory structure
        #       (i.e., not under the code of the tests themselves). This is needed in Windows to avoid
        #       issues with long paths that impede file persistence and/or impede committing to GIT.
        #       As an added benefit, I noticed it improves test performance by 50% to use shorter paths.
        #
        # To support all this we have these two attributes that get used later in self.selectTestDataLocation(-):
        #   - self.test_db_dir
        #   - self.test_config_dict
        #.
        root_trace = FunctionalTrace(
            parent_trace=None, path_mask=self._path_mask).doing(
                "Checking where results should be saved to",
                origination={'signaled_from': __file__})
        self.test_db_dir = _os.path.dirname(
            self.a6i_config.get_KB_RootFolder(root_trace))
        self.test_config_dict = YAML_Utils().load(root_trace,
                                                  path=self.test_db_dir +
                                                  '/test_config.yaml')

        # Remember location of test_db in ApodeixiConfig.
        # This flag will be set by test cases to assist with masking non-deterministic information about the
        # location of the test database. It is used in the masking function that hides parts of paths from regression
        # output, to avoid non-deterministic test output. When not using the test regression suite, this flag plays no role
        # in Apodeixi.
        self.a6i_config.test_db_dir = self.test_db_dir

        root_trace = FunctionalTrace(
            parent_trace=None, path_mask=self._path_mask).doing(
                "Provisioning stack for integration test",
                origination={'signaled_from': __file__})

        # These will be set by each individual test case (i.e., each method in a derived class with a name like "test_*")
        self._current_test_name = None

        # For ease of maintenance of tests, each output for a test will be named using standard numbering
        # enforced by the "next_*" functions
        self._output_nb = 0

        # As a general pattern, we only enforce referential integrity tests in "flow" tests, which is the
        # more "realistic" flavor of integration tests
        self.a6i_config.enforce_referential_integrity = False

        # Log output files like "POST_EVENT_LOG.txt" are normally masked in test output, so we want
        # them to match expected output to the byte when showing environment contents.
        # *HOWEVER*, in the case of CLI tests we don't mask their contents to make them "more realistic" and because
        # CLI test output doesn't show the contents of such log files.
        # So to ensure CLI tests don't frivolously fail when the test_db is relocated, this setting
        # (normally set to False) can be enabled by derived classes (such as CLI tests) can set to True
        # so the test case accepts accept whatever byte size is displayed for log files when displaying environment contents.
        self.ignore_log_files_byte_size = False