Ejemplo n.º 1
0
    def get_rollFromName(self, parent_trace, manifest_dict):
        '''
        If the manifest_dict is for a rollover situation, then it returns the manifest name we are rolling from.

        Otherwise returns None
        '''
        roll_from_name = None
        dict_path = [
            "metadata",
            "labels",
            RolloverUtils.ROLL_FROM_NAME,
        ]
        check, explanation = DictionaryUtils().validate_path(
            parent_trace,
            manifest_dict,
            "Dict Name",
            dict_path,
            valid_types=['str'])
        if check:
            roll_from_name = DictionaryUtils().get_val(parent_trace,
                                                       manifest_dict,
                                                       "Dict Name",
                                                       dict_path,
                                                       valid_types=['str'])
        return roll_from_name
Ejemplo n.º 2
0
 def FUNC(filter_trace, manifest_dict):
     labels_dict = DictionaryUtils().get_val(
         filter_trace,
         manifest_dict,
         root_dict_name="Manifest",
         path_list=["metadata", "labels"],
         valid_types=[dict])
     if self.is_equality(filter_trace, expression):
         label, val = expression.split("=")
         if not label in labels_dict.keys(
         ) or labels_dict[label] != val:
             return False
         else:
             return True
Ejemplo n.º 3
0
    def test_a6i_config(self):
        try:
            root_trace = FunctionalTrace(
                parent_trace=None, path_mask=self._path_mask).doing(
                    "Testing loading for Apodeixi Config")
            config = ApodeixiConfig(root_trace)

            # To ensure determistic output, mask parent part of any path that is mentioned in the configuration before
            # displaying it in regression putput
            #
            clean_dict = DictionaryUtils().apply_lambda(
                parent_trace=root_trace,
                root_dict=config.config_dict,
                root_dict_name="Apodeixi config",
                lambda_function=self._path_mask)

            config_txt = DictionaryFormatter().dict_2_nice(
                parent_trace=root_trace, a_dict=clean_dict, flatten=True)

            self._compare_to_expected_txt(parent_trace=root_trace,
                                          output_txt=config_txt,
                                          test_output_name='test_a6i_config',
                                          save_output_txt=True)
        except ApodeixiError as ex:
            print(ex.trace_message())
            self.assertTrue(1 == 2)
    def _retrieve_referenced_uids(self, parent_trace):
        '''
        Returns a list of UID strings that the referencing manifest (i.e., the manifest identified by 
        self.referencing_handle) has in the path given by self.referencing_path.

        If the path is not valid or if it points to something that is not a UID or list of UIDs, this method raises an
        ApodeixiError
        '''
        referencing_dict, ref_path = self.store.retrieveManifest(
            parent_trace, self.referencing_handle)
        val = DictionaryUtils().get_val(parent_trace=parent_trace,
                                        root_dict=referencing_dict,
                                        root_dict_name="Referencing Manifest",
                                        path_list=self.referencing_path,
                                        valid_types=[str, list])
        my_trace = parent_trace.doing(
            "Validating referenced UIDs are well-formed")
        # To make a uniform check, operate on lists regardless of whether val is str (1 UID) or a list (multiple UIDs)
        if type(val) == str:
            alleged_uids = [val]
        else:
            alleged_uids = val

        # We leverage the UID_Utils method tokenize to validate that UIDs are well formed
        for au in alleged_uids:
            loop_trace = my_trace.doing("Validating that '" + str(au) +
                                        "' is a well-formed UID")
            au_tokens = UID_Utils().tokenize(
                loop_trace, au)  # Will error out if au is not well formed

        # If we get this far without erroring out, the UIDs are all well-formed, so we can return them
        return alleged_uids
Ejemplo n.º 5
0
    def findController(self, parent_trace, posting_api):
        '''
        Retrieves and returns a PostingController object that knows how to process postings for objects
        belonging to the given `posting_api`.

        If the Knowledge Base or its store is not configured to support such postings, it raises an ApodeixiError.
        '''
        my_trace = parent_trace.doing(
            "Validating that KnowledgeBase supports the given posting api",
            data={'posting_api': posting_api})

        check, explanation = DictionaryUtils().validate_path(
            parent_trace=parent_trace,
            root_dict=self.controllers,
            root_dict_name='Knowledge Base supported controllers',
            path_list=[posting_api],
            valid_types=[type])

        if not check:
            raise ApodeixiError(
                my_trace,
                "Knowledge Base does not support the given posting api and kind",
                data={'posting_api': posting_api})

        my_trace = parent_trace.doing(
            "Validating that KnowledgeBase's store supports the given posting api",
            data={'posting_api': posting_api})
        store_supported_apis = self.store.supported_apis(my_trace)
        if not posting_api in store_supported_apis:
            raise ApodeixiError(
                my_trace,
                "Unable to instantiate a controller from given class. Is it the right type?",
                data={
                    'posting_api': str(posting_api),
                    'store_supported_apis found': str(store_supported_apis)
                })

        klass = self.controllers[posting_api]
        my_trace = parent_trace.doing(
            "Instantiating a PostingController class",
            data={
                'class': str(klass),
                'posting_api': str(posting_api)
            })

        try:
            ctrl = klass(my_trace, self.store, a6i_config=self.a6i_config)
        except Exception as ex:
            raise ApodeixiError(
                my_trace,
                "Unable to instantiate a controller from given class. Is it the right type?",
                data={
                    'controller_class': str(klass),
                    'exception found': str(ex)
                })

        return ctrl
Ejemplo n.º 6
0
    def __init__(self, parent_trace):

        CONFIG_FILE = self._get_config_filename(parent_trace)
        CONFIG_FOLDER = self._get_config_folder(parent_trace)

        my_trace = parent_trace.doing(
            "Attempting to load Apodeixi configuration",
            data={
                'CONFIG_FOLDER': CONFIG_FOLDER,
                'CONFIG_FILE': CONFIG_FILE
            })
        try:
            with open(CONFIG_FOLDER + '/' + CONFIG_FILE, 'r') as file:
                config_txt = str(file.read())
            config_dict = _toml.loads(config_txt)
        except Exception as ex:
            raise ApodeixiError(
                my_trace,
                "Was not able to retrieve Apodeixi configuration due to: " +
                str(ex))

        self.config_dict = config_dict

        my_trace = parent_trace.doing("Checking for any includes")
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi_config',
            path_list=['include', 'file'],
            valid_types=[str])
        if check:
            INCLUDE_FILE = config_dict['include']['file']
            try:
                my_trace = parent_trace.doing(
                    "Attemping to load include file referenced in Apodeixi configuration",
                    data={'include.file': INCLUDE_FILE})
                with open(CONFIG_FOLDER + '/' + INCLUDE_FILE, 'r') as file:
                    file_to_include_txt = str(file.read())
                file_to_include_dict = _toml.loads(file_to_include_txt)
            except Exception as ex:
                raise ApodeixiError(
                    my_trace,
                    "Was not able to retrieve include file referenced inApodeixi configuration due to: "
                    + str(ex),
                    data={'include.file': INCLUDE_FILE})

            self.config_dict = file_to_include_dict | self.config_dict

        # Determines wheter referential integrity checks are enforced. Should be True in production but
        # unit tests may choose to turn it off.
        self.enforce_referential_integrity = True

        # This flag will be set by test cases to assist with masking non-deterministic information about the
        # location of the test database. It is used in the masking function that hides parts of paths from regression
        # output, to avoid non-deterministic test output. When not using the test regression suite, this flag plays no role.
        self.test_db_dir = None
    def _compare_yaml_within_tolerance(self, parent_trace, output_dict, test_output_name, 
                                    output_data_dir, expected_data_dir, save_output_dict,
                                    tolerance_lambda):
        '''
        Asserts if the `output_dict` is within tolerance of expected output. 

        Used to avoid spurious test failures due to small deviations in expected output arising from
        non-deterministic behavior.

        A primary use case is when validating the contents of file systems (e.g., when doing snapshots of
        KnowledgeBaseStore environments). 
        
        In such cases, generated Excel files may display a size that differs by 1 or 2 bytes because of the 
        non-determinism involved in creating Excel files since "xlsx" files are really zip files with XML contents, 
        and it is well known that zip files are non-deterministcically created (for example, see 
        https://medium.com/@pat_wilson/building-deterministic-zip-files-with-built-in-commands-741275116a19).
        
        To address this problem when doing regression testing, Apodeixi provides this method which allows
        a usage of a function (`tolerance_labmda`) to pass as "valid" some output that varies within a range
        considered "fine" by the `tolerance_lambda`.

        @param tolerance_lambda A function that takes three parameters and returns a boolean:

                            boolean = tolerance_lambda(key, val_output, val_expected)

                        To understand these parameters, it is helpful to remember we are ultimately
                        comparing dict-based representation of YAML content, and therefore we
                        can think of the `output_dict` and the expected dict as "trees". That is,  
                        for each key the value is either another dict that is also a "tree", or
                        some other value, typically a scalar (a "leaf" in the tree).
                        
                        The tolerance function is applied during this recursive process when comparing a given
                        leaf node in this tree. 
                        
                        A leaf in the output tree would be a pair <key, val_output>, wheras
                        a leaf in the expected tree would be another pair <key, val_expected>.

                        In this situation, tolerance_lambda(key, val_output, val_expected) returns true if
                        val_output is "within tolerance" of val_expected, as judged by the implementation of the
                        tolerance_lambda
        '''
        expected_dict               = self._prepare_yaml_comparison(    parent_trace        = parent_trace,
                                                                        output_dict         = output_dict, 
                                                                        test_output_name    = test_output_name, 
                                                                        output_data_dir     = output_data_dir, 
                                                                        expected_data_dir   = expected_data_dir, 
                                                                        save_output_dict    = save_output_dict)

        check, explain = DictionaryUtils().compare(  parent_trace        = parent_trace, 
                                                    left_dict           = output_dict, 
                                                    right_dict          = expected_dict, 
                                                    tolerance_lambda    = tolerance_lambda)

        self.assertTrue(check, msg=explain)
Ejemplo n.º 8
0
    def getSecretsFolder(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving the secrets' folder from the Apodeixi Configuration ")
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi_config',
            path_list=['secrets', 'folder'],
            valid_types=[str])
        if not check:
            raise ApodeixiError(my_trace,
                                "Can't locate secrets' folder: " + explanation)

        # Expand any environment variables in the path
        return _os.path.expandvars(self.config_dict['secrets']['folder'])
Ejemplo n.º 9
0
    def getMonthFiscalYearStarts(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving Knowledge Base's fiscal year start from the Apodeixi Configuration "
        )
        SETTINGS = 'organization-settings'
        FY_START = 'month-fiscal-year-starts'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[SETTINGS, FY_START],
            valid_types=[int])
        if not check:
            raise ApodeixiError(
                my_trace, "Can't locate Knowledge Base's fiscal year start: " +
                explanation)

        return self.config_dict[SETTINGS][FY_START]
Ejemplo n.º 10
0
    def getKnowledgeBaseAreas(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving Knowledge Base's areas from the Apodeixi Configuration "
        )
        SETTINGS = 'organization-settings'
        ORGANIZATION = 'knowledge-base-areas'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[SETTINGS, ORGANIZATION],
            valid_types=[list])
        if not check:
            raise ApodeixiError(
                my_trace,
                "Can't locate Knowledge Base's areas: " + explanation)

        return self.config_dict[SETTINGS][ORGANIZATION]
Ejemplo n.º 11
0
    def get_CLI_InitializerClassname(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving Knowledge Base's CLI initializer classname from the Apodeixi Configuration "
        )
        CLI = 'cli'
        CLASSNAME = 'initializer-classname'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[CLI, CLASSNAME],
            valid_types=[str])
        if not check:
            raise ApodeixiError(
                my_trace,
                "Can't locate Knowledge Base's areas: " + explanation)

        return self.config_dict[CLI][CLASSNAME]
Ejemplo n.º 12
0
    def get_ExternalCollaborationFolder(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving external collaboration root folder from the " +
            "Apodeixi Configuration ")
        KB = 'knowledge-base'
        EXTERNAL_FOLDER = 'external-collaboration-folder'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[KB, EXTERNAL_FOLDER],
            valid_types=[str])
        if not check:
            raise ApodeixiError(
                my_trace,
                "Can't locate external collaboration folder: " + explanation)

        # Expand any environment variables in the path
        return _os.path.expandvars(self.config_dict[KB][EXTERNAL_FOLDER])
Ejemplo n.º 13
0
    def get_KB_RootFolder(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving Knowledge Base's root folder from the Apodeixi Configuration "
        )
        KB = 'knowledge-base'
        ROOT_FOLDER = 'knowledge-base-root-folder'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[KB, ROOT_FOLDER],
            valid_types=[str])
        if not check:
            raise ApodeixiError(
                my_trace,
                "Can't locate root folder for Knowledge Base: " + explanation)

        # Expand any environment variables in the path
        return _os.path.expandvars(self.config_dict[KB][ROOT_FOLDER])
Ejemplo n.º 14
0
    def getGrandfatheredScoringCycles(self, parent_trace):
        '''
        Returns a list of strings, corresponding to manually configured scoring cycles that are considered
        valid.
        This method exists for backward compatibility reasons, to not invalidate data created before Apodeixi started
        enforcing that scoring cycles must be strings that can be successfully parsed into FY_Quarter objects.
        '''
        my_trace = parent_trace.doing(
            "Retrieving grandfathered scorcing cycles from the Apodeixi Configuration "
        )
        BACK_COMPATIBILITY = 'backward-compabitility'
        GRANDFATHERED_SC = 'grandfathered_scoring_cycles'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[BACK_COMPATIBILITY, GRANDFATHERED_SC],
            valid_types=[list])
        if not check:
            # If nothing is grandfathered, that is good. It means this deployment will only use modern
            # scoring cycles that can be converted to FY_Quarter objects
            return []

        return self.config_dict[BACK_COMPATIBILITY][GRANDFATHERED_SC]
Ejemplo n.º 15
0
    def test_notebook_run(self):
        root_trace = FunctionalTrace(
            parent_trace=None,
            path_mask=self._path_mask).doing("Testing Notebook execution")
        try:
            INPUT_FOLDER = self.input_data
            OUTPUT_FOLDER = self.output_data
            EXPECTED_FOLDER = self.expected_data
            TEST_SCENARIO = 'test_notebook_run'

            PathUtils().create_path_if_needed(root_trace,
                                              OUTPUT_FOLDER + "/notebooks/")
            nb_utils = NotebookUtils(
                src_folder=INPUT_FOLDER,
                src_filename=TEST_SCENARIO + "_INPUT.ipynb",
                destination_folder=OUTPUT_FOLDER + "/notebooks/",
                destination_filename=TEST_SCENARIO +
                "_executed_notebook.ipynb")

            my_trace = root_trace.doing("Running notebook")
            result_dict = nb_utils.run(my_trace)

            # Remove a path with timestamps since it changes all the time
            my_trace = root_trace.doing("Removing path with timestamps")
            hide_timestamps = lambda x: '<Timestamps removed in test output>'
            cleaned_dict = DictionaryUtils().replace_path(
                parent_trace=my_trace,
                root_dict=result_dict,
                root_dict_name='nb_utils_run_result_dict',
                path_list=['cells', '*', 'metadata', 'execution', '*'],
                replacement_lambda=hide_timestamps)
            my_trace = root_trace.doing(
                "Hiding user_folders printed as output")

            def _hide_root_folder(val):
                '''
                1) Hides root directory for paths displayed in output
                2) Converts displayed paths to Linux format, so we get same output in Windows and Linux

                @param val A string; normally the value of an entry in a dictionary
                '''
                folder_hints = [
                    'apodeixi\\util', 'apodeixi\\\\util', 'apodeixi/util'
                ]
                result = val
                for hint in folder_hints:
                    if hint in val:  # val is a path, keep only what comes after 'src/apodeixi'.
                        result = '<Root directory hidden in test output>' + hint + val.split(
                            hint)[1]
                        if _os.name == "nt":  # Display in Linux style
                            result = result.replace("\\\\", "/")
                        return result

                return result

            def _hide_version_nb(val):
                '''
                1) Hides root directory for paths displayed in output
                2) Converts displayed paths to Linux format, so we get same output in Windows and Linux
                3) Masks Python version numbers so that output does not depend on what version of Python is used to run tests

                @param val A string; normally the value of an entry in a dictionary
                '''
                # First mask Python version numbers for vals like: "    version: 3.9.7"
                VERSION_NB_REGEX = _re.compile(r'[0-9]+.[0-9]+.[0-9]+')
                result = _re.sub(VERSION_NB_REGEX, '<VERSION NB>', val)

                return result

            cleaned_dict = DictionaryUtils().replace_path(
                parent_trace=my_trace,
                root_dict=cleaned_dict,
                root_dict_name='aha_configurer_result_dict',
                path_list=['cells', '*', 'outputs', '*', 'data', 'text/plain'],
                replacement_lambda=_hide_root_folder)
            cleaned_dict = DictionaryUtils().replace_path(
                parent_trace=my_trace,
                root_dict=cleaned_dict,
                root_dict_name='aha_configurer_result_dict',
                path_list=['metadata', 'language_info', 'version'],
                replacement_lambda=_hide_version_nb)

            self._compare_to_expected_yaml(parent_trace=my_trace,
                                           output_dict=cleaned_dict,
                                           test_output_name=TEST_SCENARIO,
                                           save_output_dict=True)
        except ApodeixiError as ex:
            print(ex.trace_message())
            self.assertTrue(1 == 2)
Ejemplo n.º 16
0
    def manifests_description(self, parent_trace, kb_session, kinds_of_interest, labels_of_interest, environment_filter):
        '''
        Returns a nicely formatted string, suitable for CLI output. 
        
        It displays summary information for all manifests that the
        KnowledgeBase currenly supports whose kind is one of the `kinds_of_interest` and
        which have all the labels of interest.

        @param kinds_of_interest A list of strings, corresponding to manifest kinds we seek. If null, then
            we will collect all kinds known to the system.
        @param labels_of_interest A list of strings of the form "<field>=<value>", which constrains
            while manifests are returned by forcing that each of them has <field> as a label with value <value>.
            If set to None, then all manifests are included.

        @param environment_filter A lambda function, that takes a string argument and returns True or False.
            Its purposes is to filte out which KnowledgeBase store's environments to include when searching
            for products. If it is None, then all environments are included
        '''
        FMT                             = StringUtils().format_as_yaml_fieldname

        description_table               = []
        description_headers             = ["Kind", "Version", "Estimated on", "Recorded by", "Namespace", "Name", "Environment"]

        environments                    = []
        environments.append(kb_session.store.base_environment(parent_trace).name(parent_trace))
        environments.extend(self._sandboxes_names_list(parent_trace, kb_session))

        if environment_filter != None:
            environments                = [e for e in environments if environment_filter(e) == True]

        if kinds_of_interest == None:
            kinds_of_interest           = self._get_all_kinds(parent_trace, kb_session)

        original_env_name               = kb_session.store.current_environment(parent_trace).name(parent_trace)
        for env_name in environments:
            kb_session.store.activate(parent_trace, env_name)

            def _manifest_filter(parent_trace, manifest_dict):
                #TODO For now we just approve everything. Later will need to filter on labels
                return True

            manifest_dict_list          = kb_session.store.searchManifests(parent_trace, kinds_of_interest, 
                                                                                manifest_filter = _manifest_filter)
            # ["Kind", "Version", "Estimated on", "Namespace", "Name", "Environment"]
            GET                         = DictionaryUtils().get_val
            for m_dict in manifest_dict_list:
                kind                = GET(parent_trace, m_dict, "Manifest", ["kind"], [str])
                version             = GET(parent_trace, m_dict, "Manifest", ["metadata", "version"], [int])
                estimated_on        = GET(parent_trace, m_dict, "Manifest", ["assertion", "estimatedOn"], [datetime])
                recorded_by         = GET(parent_trace, m_dict, "Manifest", ["assertion", "recordedBy"], [datetime])
                namespace           = GET(parent_trace, m_dict, "Manifest", ["metadata", "namespace"], [str])
                name                = GET(parent_trace, m_dict, "Manifest", ["metadata", "name"], [str])

                description_table.append([kind, version, estimated_on, recorded_by,  namespace, name, env_name])
            
        # To ensure output to be predictable (e.g., for regression tests) we sort the description table. We found that
        # otherwise some tests that pass in Windows will fail when run in a Linux container.
        #
        KIND_IDX                    = 0
        VERSION_IDX                 = 1
        description_table           = sorted(description_table, key=lambda entry: entry[KIND_IDX] + str(entry[VERSION_IDX]))

        kb_session.store.activate(parent_trace, original_env_name)

        description                     = "\n\n"
        description                     += tabulate(description_table, headers=description_headers)
        description                     += "\n"

        return description
Ejemplo n.º 17
0
    def check_environment_contents(self, parent_trace, snapshot_name=None):
        '''
        Helper method to validate current environment's folder hierarchy is as expected at this point in time
        in the execution of a test case.

        @param snapshot_name A string, corresponding to the output name under which the regression output should
                            be generated. If set to None, then it will be computed by calling self.next_snapshot()
        '''
        current_env = self.stack().store().current_environment(parent_trace)

        if snapshot_name == None:
            snapshot_name = self.next_snapshot()

        raw_description_dict = current_env.describe(parent_trace,
                                                    include_timestamps=False)

        # Some keys are timestamped filenames or folders, like "210915 Some_report.xlsx" (for Sep 15, 2021)
        # or even "210917.072312 Some_report.xlsx" (if report was produced at 7:32:12 am). To mask such
        # timestamps, we replace the occurrence of any 6 digits in a key by the string "<MASKED>"
        def mask_timestamps(a_dict):
            new_dict = {}
            for key in a_dict:
                raw_child = a_dict[key]
                if type(raw_child) == dict:
                    new_child = mask_timestamps(raw_child)
                else:
                    new_child = raw_child
                new_key = StringUtils().mask_timestamp(key)
                new_dict[new_key] = new_child
            return new_dict

        description_dict = mask_timestamps(raw_description_dict)

        check, explanation = DictionaryUtils().validate_path(
            parent_trace=parent_trace,
            root_dict=self.test_config_dict,
            root_dict_name="test_config.yaml",
            path_list=["regression-parameters", "xl-tolerance"],
            valid_types=[int])
        if check == False:
            raise ApodeixiError(
                parent_trace,
                "test_config.yaml misses a correct configuration for 'xl-tolerance' "
                + " under the grouping 'regression-parameters'",
                data={"explanation": explanation},
                origination={
                    'concrete class': str(self.__class__.__name__),
                    'signaled_from': __file__
                })
        TOLERANCE = self.test_config_dict["regression-parameters"][
            "xl-tolerance"]

        def tolerance_lambda(key, output_val, expected_val):
            '''
            We need to mask or tolerate differences in regression test output because of
            expected nondeterminism. Multiple cases:

            1. When an Excel filename is displayed as a key in a folder hierarchy, and it has a 
            description given by a string that includes things like "Size (in bytes):  7677",
            we want to tolerate a small deviation from the number of bytes in the size.
            For example, "Size (in bytes):  7678" would not be considered a test failure

            2. When an environment's "METADATA.yaml" file is displayed as a key in a folder hierarchy,
            its contents can't be masked because the test harness itself needs the full paths inside
            the "METADATA.yaml" when, for example, re-creating a pre-existing environment from disk
            (as it happens in CLI testing - each CLI command is a initializes a separate KnowledgeBaseStore object
            in memory, so to share environments across multiple CLI commands each successive command's
            KnowledgeBaseStore needs to load from disk the information about environments created from prior
            CLI commands, and that is what "METADATA.yaml" is for.
            Bottom line: no masking can go inside "METADATA.yaml", so its size in bytes will change if one
            relocates the test DB, since its location appears inside "METADATA.yaml". So we "accept" whatever
            byte size it has.

            3. Log output files like "POST_EVENT_LOG.txt" are normally masked in test output, so we want
            them to match expected output to the byte. *HOWEVER*, in the case of CLI tests we don't mask their
            contents to make them "more realistic" and because CLI test output doesn't show the contents of such
            log files. So to ensure CLI tests don't frivolously fail when the test_db is relocated, there is
            a setting (self.ignore_log_files_byte_size), normally set to False but which derived classes
            (such as CLI tests) can set to True. So if this flag is on, we also accept whatever byte size
            exists in log files.

            '''
            if type(key) == str and key.endswith(
                    ".xlsx"):  # This is an Excel file, apply tolerance

                # If we are running in Linux and doing regression tests, then
                # we must "inflate" the tolerance because Linux and Windows zip files differently, which results
                # in different sizes for Excel files (since an Excel file is just a set of XML files zipped up)
                if _os.name != "nt":
                    ADDITIONAL_TOLERANCE = 100
                else:
                    ADDITIONAL_TOLERANCE = 0

                output_bytes = _extract_bytes(output_val)
                expected_bytes = _extract_bytes(expected_val)
                if output_bytes == None or expected_bytes == None:  # Default to default comparison
                    return output_val == expected_val
                else:
                    return abs(output_bytes - expected_bytes
                               ) <= TOLERANCE + ADDITIONAL_TOLERANCE
            elif key == "METADATA.yaml":
                return True  # Just accept whatever number of bytes are shown, as per method documentation above
            elif self.ignore_log_files_byte_size == True and \
                        (key == "POST_EVENT_LOG.txt" or key == "FORM_REQUEST_EVENT_LOG.txt"):
                return True  # Just accept whatever number of bytes are shown, as per method documentation above.

            # If we get this far we were unable to detect the conditions for which tolerance applies, so
            # do a straight compare
            return output_val == expected_val

        def _extract_bytes(file_info_message):
            '''
            @param file_info_message A string, expected to contain subtrings like "Size (in bytes):  7677"

            @returns The number of bytes in the message as an int (7677 in the example), or None if the
                    file_info_message does not contain a substring as indicated
            '''
            if type(file_info_message) != str:
                return None
            REGEX = "Size \(in bytes\):  ([0-9]+)"
            m = _re.search(REGEX, file_info_message)
            if m == None or len(m.groups()) != 1:
                return None
            nb_bytes = int(m.group(1))
            return nb_bytes

        self._compare_yaml_within_tolerance(parent_trace=parent_trace,
                                            output_dict=description_dict,
                                            test_output_name=snapshot_name,
                                            save_output_dict=True,
                                            tolerance_lambda=tolerance_lambda)
    def infer(self, parent_trace, manifest_dict, manifest_key):
        '''
        Used in the context of generating a form to build the posting label information that should be
        embedded in the generated form.

        Accomplishes this by extracting the necesssary information from the manifest given by the `manifest_dict`

        Returns a list of the fields that may be editable

        @param manifest_dict A dict object containing the information of a manifest (such as obtained after loading
                            a manifest YAML file into a dict)
        @param manifest_key A string that identifies this manifest among others. For example, "big-rock.0". Typically
                    it should be in the format <kind>.<number>
        '''
        editable_fields = super().infer(parent_trace, manifest_dict,
                                        manifest_key)

        ME = JourneysPostingLabel

        def _infer(fieldname, path_list):
            self._inferField(parent_trace=parent_trace,
                             fieldname=fieldname,
                             path_list=path_list,
                             manifest_dict=manifest_dict)

        _infer(ME._PRODUCT, ["metadata", "labels", ME._PRODUCT])
        _infer(ME._JOURNEY, [
            "metadata",
            "labels",
            ME._JOURNEY,
        ])
        _infer(ME._SCENARIO, [
            "metadata",
            "labels",
            ME._SCENARIO,
        ])

        check, explanation = DictionaryUtils().validate_path(
            parent_trace,
            manifest_dict,
            "Dict Name", [
                "metadata",
                "labels",
                RolloverUtils.ROLL_TO_SCORING_CYCLE,
            ],
            valid_types=['str'])
        if check:
            # As documented above in the definition of _ROLL_TO_SCORING_CYCLE, the existence of this label in manifest_dict
            # must be treated as a hint that we are in a rollolver situation.
            #
            # I.e., we are generating a label for a form meant to be used to create the first manifest of
            # "the next year" (e.g., "FY 23") even though the "previous" manifest_dict is from "the prior year" (e.g., "FY 22").
            # Thus, when we have
            #                       _SCORING_CYCLE="FY 22" & _ROLL_TO_SCORING_CYCLE="FY 23"
            #
            # we should set the generated form's posting label's _SCORING_CYCLE to "FY 23", not "FY 22"
            #
            _infer(ME._SCORING_CYCLE, [
                "metadata",
                "labels",
                RolloverUtils.ROLL_TO_SCORING_CYCLE,
            ])

            # For good measure, we add a purely informative additional field to the posting label, so the user can tell
            # that the "previous manifest" from which the generated form was created is for the previous year,
            # not the current year as would be in the "usual" case
            #
            _infer(RolloverUtils.ROLL_FROM_SCORING_CYCLE, [
                "metadata",
                "labels",
                ME._SCORING_CYCLE,
            ])
        else:
            # This is the "usual" case: we are not rolling from a period (e.g., FY 22) to the next one (e.g., FY 23),
            # so populate the label's SCORING_CYCLE with what we have in the manifest_dict for such a field
            _infer(ME._SCORING_CYCLE, [
                "metadata",
                "labels",
                ME._SCORING_CYCLE,
            ])

        _infer(ME._SCORING_MATURITY, [
            "metadata",
            "labels",
            ME._SCORING_MATURITY,
        ])
        editable_fields.extend([ME._SCORING_MATURITY])

        return editable_fields
Ejemplo n.º 19
0
    def build(parent_trace, rootdir, filter=None, include_timestamps=True):
        '''
        Constructs and returns a new FolderHierarchy structure.

        @param rootdir A string, representing the root of the folder structure
        @param filter A function that takes a string as an argument and returns a boolean:

                            filter(object)

        where object is either the name of a file or the full path of a subdirectory in the
        of rootdir (i.e, object = rootdir/<something>)

        As we recurse through the descendent folders beneath `rootdir`, if the filter is not None
        then paths like rootdir/<something> will be included only if filter(rootdir/<something>) = True

        Likewise, a file called myFileName would only be included if filter(myFileName) = True 
        '''
        try:

            hierarchy_dict = {}
            parent_folder = _os.path.split(rootdir)[1]
            path_to_parent = _os.path.split(rootdir)[0]

            if include_timestamps:
                clean_parent_folder = parent_folder
            else:
                # In this case, mask all consecutive 6-digit substrings, as they are likely to be timestamps.
                # Example:
                #       When the CLI runs, it creates environments with names like '210822.162717_sandbox'
                # (for the 22nd of August of 2021, at 4:27 pm and 17 seconds). The timestamps need to be masked
                # in regression test output so that it bcecomes deterministic.
                clean_parent_folder = _re.sub(pattern="[0-9]{6}",
                                              repl="<MASKED>",
                                              string=parent_folder)
            hierarchy_dict[clean_parent_folder] = {}
            for currentdir, dirs, files in _os.walk(rootdir):

                if filter != None and not filter(currentdir):
                    continue

                for a_file in files:
                    if filter == None or filter(a_file):
                        loop_trace = parent_trace.doing("Adding file '" +
                                                        a_file + "'")
                        relative_path = PathUtils().relativize(
                            loop_trace, path_to_parent, currentdir)
                        branch_tokens = PathUtils().tokenizePath(
                            loop_trace,
                            relative_path[0] + "/" + a_file,
                            absolute=False)

                        # If we cleaned timestamps from parent folder, also clean them from the path to the file
                        # we are looking at
                        if len(branch_tokens
                               ) > 0 and branch_tokens[0] == parent_folder:
                            branch_tokens[0] = clean_parent_folder

                        full_path = currentdir + "/" + a_file
                        if include_timestamps:
                            creation_time = _os.path.getctime(full_path)
                            access_time = _os.path.getatime(full_path)
                            modification_time = _os.path.getmtime(full_path)
                        else:
                            creation_time = None
                            access_time = None
                            modification_time = None
                        file_size = _os.path.getsize(full_path)

                        nb_lines = FolderHierarchy._count_lines(full_path)

                        # If we are running in Linux and doing regression tests, then
                        # we must "inflate" the size of the output file because Linux uses
                        # "\n" to end a line, whereas the expected file was created in Windows that adds an extra byte per line,
                        # since Windows uses "\r\n" to end each line
                        #
                        # GOTCHA: it is possible that a file was not created by the test suite, but "copied" from some
                        #       input area under source control. Example:
                        #
                        #        test_db/knowledge-base/envs/1501_ENV/kb/manifests/my-corp.production/kb/manifests/line-of-business.1.yaml
                        #
                        # In that case, even when using Linux, such a file would contains the extra "\r" character per line,
                        # since it was created by a developer in Windows, committed to source control, and the Linux test
                        # harness simply copied it.
                        # THEREFORE: we don't "inflate" the size for files that were not created by this test run.
                        #           We can tell that if the file was created more than (say) a minute ago
                        epoch_time = int(_time.time())
                        if _os.name != "nt" and abs(epoch_time - _os.path.
                                                    getmtime(full_path)) < 60:
                            file_size += nb_lines

                        file_meta = FileMetadata(
                            filename=a_file,
                            file_size=file_size,
                            created_on=creation_time,
                            last_accessed_on=access_time,
                            last_modified_on=modification_time)
                        inner_trace = loop_trace.doing(
                            "Adding value to dict",
                            data={
                                "path_list": str(branch_tokens),
                                "val": str(file_meta)
                            },
                            origination={'signaled_from': __file__})
                        DictionaryUtils().set_val(
                            parent_trace=inner_trace,
                            root_dict=hierarchy_dict,
                            root_dict_name=clean_parent_folder,
                            path_list=branch_tokens,
                            val=file_meta)

        except ApodeixiError as ex:
            raise ex
        except Exception as ex:
            traceback_stream = StringIO()
            trace_msg = ""
            trace_msg += "\n" + "-" * 60 + '\tTechnical Stack Trace\n\n'
            _traceback.print_exc(file=traceback_stream)
            trace_msg += traceback_stream.getvalue()
            trace_msg += "\n" + "-" * 60
            raise ApodeixiError(
                parent_trace,
                "Encountered error while building a FolderHierarchy",
                data={
                    "rootdir": str(rootdir),
                    "exception": str(ex),
                    "stack trace": trace_msg
                })

        hierarchy = FolderHierarchy(hierarchy_dict)
        return hierarchy