예제 #1
0
    def getMonthFiscalYearStarts(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving Knowledge Base's fiscal year start from the Apodeixi Configuration "
        )
        SETTINGS = 'organization-settings'
        FY_START = 'month-fiscal-year-starts'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[SETTINGS, FY_START],
            valid_types=[int])
        if not check:
            raise ApodeixiError(
                my_trace, "Can't locate Knowledge Base's fiscal year start: " +
                explanation)

        return self.config_dict[SETTINGS][FY_START]
예제 #2
0
    def __init__(self,
                 parent_trace,
                 read_misses_policy,
                 use_timestamps=True,
                 path_mask=None):
        ME = KB_Environment_Config
        if not read_misses_policy in ME.READ_MISSES_POLICIES:
            raise ApodeixiError(
                parent_trace,
                "The read misses policy that was provided is not supported",
                data={
                    "read_misses_policy": str(read_misses_policy),
                    "supported policies": str(ME.READ_MISSES_POLICIES)
                })

        self.read_misses_policy = read_misses_policy
        self.use_timestamps = use_timestamps
        self.path_mask = path_mask
예제 #3
0
def troubleshoot_command():

    root_trace = FunctionalTrace(parent_trace=None,
                                 path_mask=None).doing("Troubleshooting")

    runner = CliRunner()
    result = runner.invoke(CLI, COMMAND)

    if result.exit_code != 0:
        raise ApodeixiError(root_trace,
                            "CLI command failed",
                            data={
                                "CLI exit code": str(result.exit_code),
                                "CLI exception": str(result.exc_info),
                                "CLI output": str(result.output),
                                "CLI traceback": str(result.exc_info)
                            })
    return
    def path_tokens(self, parent_trace):
        '''
        Returns a list of strings, corresponding to the path tokens implicit by this FilingCoordinates instance.
        '''
        if self.scoringCycle == None or self.product == None or self.scenario == None:
            raise ApodeixiError(
                parent_trace,
                "Can't provide path_tokens because JourneysFilingCoordinates is not fully built",
                data={
                    "scoringCycle": self.scoringCycle,
                    "product": self.product,
                    "scenario": self.scenario
                })

        return [
            JourneysFilingCoordinates.JOURNEYS, self.scoringCycle,
            self.product, self.scenario
        ]
예제 #5
0
        def find_uid(self, parent_trace, row_number):
            '''
            Returns the UID (a string) associated with the given row number. If no such link is already recorded,
            returns None

            @param row_number An int representing a row number in a tabular representation of a manifest.
            '''
            if type(row_number) != int:
                raise ApodeixiError(
                    parent_trace,
                    "Can't retrieve a UID for a row number that is not an int",
                    data={"type(row_number)": str(type(row_number))})

            if row_number in self.row_2_uid.keys():
                uid = self.row_2_uid[row_number]
                return uid
            else:
                return None
예제 #6
0
    def getKnowledgeBaseAreas(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving Knowledge Base's areas from the Apodeixi Configuration "
        )
        SETTINGS = 'organization-settings'
        ORGANIZATION = 'knowledge-base-areas'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[SETTINGS, ORGANIZATION],
            valid_types=[list])
        if not check:
            raise ApodeixiError(
                my_trace,
                "Can't locate Knowledge Base's areas: " + explanation)

        return self.config_dict[SETTINGS][ORGANIZATION]
        def preprocessReadFragment(self, parent_trace, interval,
                                   dataframe_row):
            '''
            This is called by the BreakdownTree's readDataframeFragment method before attempting to parse a fragment
            from a row in a DataFrame.

            This method is offered as a "hook" to derived classes in case they want to "enrich" the input to the parser,
            by overwriting this method with the appropriate "enriching" logic.

            It returns "potentically improved" versions of the `interval` and `dataframe_row` parameters.

            For this concrete class, the gist of what this method does is validate the input, i.e., that the
            scoring cycles can be parsed into valid FY_Quarter objects

            @param interval         An Interval object, corresponding to the columns in `row` that pertain to an entity being 
                                    processed in readDataframeFragment
            @param dataframe_row    A tuple `(idx, series)` representing a row in a larger Pandas Dataframe as yielded by
                                    the Dataframe `iterrows()` iterator.
            @returns                A pair: 1) an Interval object, and 2) tuple `(idx, series)` that may pass for a Pandas row

            '''
            scoring_cycle = dataframe_row[1][self.SCORING_CYCLE_COL]

            grandfathered = self.controller.a6i_config.getGrandfatheredScoringCycles(
                parent_trace)

            if not IntervalUtils().is_blank(
                    scoring_cycle) and not scoring_cycle in grandfathered:
                # See if we can parse it
                try:
                    time_bucket = FY_Quarter.build_FY_Quarter(
                        parent_trace, scoring_cycle)
                except ApodeixiError as ex:
                    raise ApodeixiError(
                        parent_trace,
                        "Can't process posting because there it at least one invalid scoring "
                        +
                        "cycle. Ideally they should be something like 'FY22' or 'Q2 FY25'",
                        data={
                            "invalid scoring cycle": str(scoring_cycle),
                            "parsing error": ex.msg
                        })

            return interval, dataframe_row
    def load_csv(self, parent_trace, path, header=0):
        '''
        Helper method to load a "clean DataFrame" from a CSV file, correcting for spurious columns and NAs
        '''
        try:
            # Important to set the encoding to "ISO-8859-1". Otherwise will get errors as explained in
            # https://stackoverflow.com/questions/19699367/for-line-in-results-in-unicodedecodeerror-utf-8-codec-cant-decode-byte
            data_df             = _pd.read_csv(path, encoding = "ISO-8859-1", header=header)
        except FileNotFoundError as ex:
            raise ApodeixiError(parent_trace, "Can't load CSV file because it doesn't exist",
                                    data = {'path':             path,
                                            'error':            str(ex)})

        data_df             = data_df.fillna('')
        SPURIOUS_COL        = 'Unnamed: 0'
        if SPURIOUS_COL in data_df.columns:
            data_df             = data_df.drop([SPURIOUS_COL], axis=1)

        # We will have to clean data a bit, since some packaging procedures (for example,
        # creating a Conda package) introduces some carriage returns '\r\n' where the original expected output only
        # has a newline '\n', causing tests to fail when users install the Conda package. So simply remove the
        # '\r' from amu offending column, which typically are columns whose values are stringied arrays (i.e., strings with
        # newlines '\n' that confuse the Conda packaging). For packaging procedures that have no '\r', no harm is
        # done by this cleanup (i.e., expected_df is left "as is" if there are no '\r' in its 'Words per row')
        def _remove_carriage_returns(obj):
            if type(obj) == str:
                return obj.replace('\\r', '').replace('\r', '')
            elif type(obj) == list: # Remove carriages element by element
                return [_remove_carriage_returns(elt) for elt in obj]
            else:
                return obj

        # First clean the columns
        data_df.columns = [_remove_carriage_returns(col) for col in data_df.columns]

        # Now clear the cells
        for col in data_df.columns:
            data_df[col] = data_df.apply(lambda row: _remove_carriage_returns(row[col]), axis=1)

        # Clean up numbers and all else to a standard
        CLEANED                                         = DataFrameUtils().clean  # Abbreviation to express intent
        for col in data_df.columns:
            data_df[col] = data_df.apply(lambda row: CLEANED(row[col]), axis=1)
        return data_df
    def getPostingConfig(self, parent_trace, kind, manifest_nb):
        '''
        Return a PostingConfig, corresponding to the configuration that this concrete controller supports.
        '''
        ME = ScoringCyclesController
        if kind in self.SUPPORTED_KINDS:
            update_policy = UpdatePolicy(reuse_uids=True, merge=False)
            xlr_config = ME._ScoringCycleConfig(kind=kind,
                                                update_policy=update_policy,
                                                manifest_nb=manifest_nb,
                                                controller=self)
        else:
            raise ApodeixiError(parent_trace,
                                "Invalid domain object '" + kind +
                                "' - should be one of " +
                                ", ".join(self.SUPPORTED_KINDS),
                                origination={'signaled_from': __file__})

        return xlr_config
예제 #10
0
    def is_leaf(self, parent_trace, path):
        '''
        Returns true if 'path` is the name of a file or folder (e.g., "my_file.txt") as opposed to a path
        with at least some parent directories, like "project/config/my_file.txt" would be.
        '''
        if type(path) != str:
            raise ApodeixiError(
                parent_trace,
                "The given path should be a string, but is not",
                data={
                    "type(path)": str(type(path)),
                    "str(path)": str(path)
                })

        pair = _os.path.split(path)
        if len(pair[0]) == 0:
            return True

        return False
예제 #11
0
    def get_ExternalCollaborationFolder(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving external collaboration root folder from the " +
            "Apodeixi Configuration ")
        KB = 'knowledge-base'
        EXTERNAL_FOLDER = 'external-collaboration-folder'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[KB, EXTERNAL_FOLDER],
            valid_types=[str])
        if not check:
            raise ApodeixiError(
                my_trace,
                "Can't locate external collaboration folder: " + explanation)

        # Expand any environment variables in the path
        return _os.path.expandvars(self.config_dict[KB][EXTERNAL_FOLDER])
예제 #12
0
    def get_KB_RootFolder(self, parent_trace):
        my_trace = parent_trace.doing(
            "Retrieving Knowledge Base's root folder from the Apodeixi Configuration "
        )
        KB = 'knowledge-base'
        ROOT_FOLDER = 'knowledge-base-root-folder'
        check, explanation = DictionaryUtils().validate_path(
            parent_trace=my_trace,
            root_dict=self.config_dict,
            root_dict_name='apodeixi',
            path_list=[KB, ROOT_FOLDER],
            valid_types=[str])
        if not check:
            raise ApodeixiError(
                my_trace,
                "Can't locate root folder for Knowledge Base: " + explanation)

        # Expand any environment variables in the path
        return _os.path.expandvars(self.config_dict[KB][ROOT_FOLDER])
예제 #13
0
    def uid_from_row(self, parent_trace, manifest_identifier, row_number):
        '''
        Finds and returns the last (i.e., most granular) UID for the given row number.
        If we think of the DataFrame's row as a branch in a tree, the UID returned corresponds to the leaf
        of the branch.
        '''
        if not manifest_identifier in self.links_dict.keys():
            raise ApodeixiError(
                parent_trace,
                "Can't retrieve UID from row number because manifest has no links "
                + "associated with it",
                data={
                    "manifest_identifier": str(manifest_identifier),
                    "row_number": str(row_number)
                })

        links = self.links_dict[manifest_identifier]
        uid = links.find_uid(parent_trace, row_number)
        return uid
예제 #14
0
    def find_foreign_uid(self, parent_trace, our_manifest_id,
                         foreign_manifest_id, our_manifest_uid, many_to_one):
        '''
        Used to establish joins between manifest by determining the foreign key to use in the join, i.e.,
        a way for "our manifest" to reference a "foreign manifest".

        Specifically, it assumes a link exists in this LinkTable between one of our manifest's UIDs and
        one of the foreign manifest's and finds and return's that foreign manifest's UIDs that our UID is linked to.

        @param many_to_one A boolean. If True, it is that multiple rows of our manifest correspond to the same
                                row of the foreign manifest, and only the first such row would have displayed
                                the foreign UID. That triggers a need to "search" for earlier row numbers
        '''
        row_nb = self.row_from_uid(parent_trace=parent_trace,
                                   manifest_identifier=our_manifest_id,
                                   uid=our_manifest_uid)

        if row_nb == None:
            raise ApodeixiError(
                parent_trace,
                "Can't find foreign uid because our manifest uid does not appear to be in any row",
                data={
                    "our_manifest_id": str(our_manifest_id),
                    "our_manifest_uid": str(our_manifest_uid),
                    "foreign_manifest_id": str(foreign_manifest_id)
                })

        if many_to_one == False:  # search only in row_nb
            foreign_uid = self.uid_from_row(
                parent_trace=parent_trace,
                manifest_identifier=foreign_manifest_id,
                row_number=row_nb)
        else:  # search first in row_nb, and if nothing is found keep looking in earlier rows
            foreign_uid = None
            for current_row in reversed(range(row_nb + 1)):
                foreign_uid = self.uid_from_row(
                    parent_trace=parent_trace,
                    manifest_identifier=foreign_manifest_id,
                    row_number=current_row)
                if foreign_uid != None:
                    break

        return foreign_uid
예제 #15
0
    def without_comments_in_parenthesis(self, parent_trace, txt_or_tuple):
        '''
        Returns a modified version of `txt_or_tuple`.
        
        If `txt_or_tuple` is a string, then it returns a substring of `txt_or_tuple` ignoring any sub-text 
        within `txt_or_tuple` that is in parenthesis. 
        It also strips any leading or trailing spaces.
        
        For example, if txt is 'Effort (man days) to deliver', then this function return 'Effort to deliver'

        If `txt_or_tuple` is a tuple then it applies the same logic to each string in the tuple, and returns the tuple.
        '''
        def _strip_parenthesis(parent_trace, txt):
            if type(txt) != str:
                raise ApodeixiError(
                    parent_trace,
                    "Encountered problem removing comments in parenthesis: expected a string, "
                    + "but instead was given a '" + str(type(txt)),
                    data={"invalid input": str(txt)})
            stripped_txt = StringUtils().strip(txt)
            # Remove text within parenthesis, if any, using the natural language tool nltk.tokenize.SExprTokenizer
            sexpr = SExprTokenizer(strict=False)
            sexpr_tokens = sexpr.tokenize(stripped_txt)
            parenthesis_free_tokens = [
                t for t in sexpr_tokens if not ')' in t and not '(' in t
            ]
            parentheis_free_txt = ' '.join(parenthesis_free_tokens)
            return parentheis_free_txt

        if type(txt_or_tuple) == str:
            return _strip_parenthesis(parent_trace, txt_or_tuple)
        elif type(
                txt_or_tuple
        ) == tuple:  # This happens when there is a MultiLevel index for the columns
            result_list = [
                _strip_parenthesis(parent_trace, txt) for txt in txt_or_tuple
            ]
            return tuple(result_list)
        else:
            raise ApodeixiError(
                parent_trace,
                "Expected column header to be a string or tuple, not a '" +
                str(type(txt_or_tuple)) + "'")
예제 #16
0
        def find_row_number(self, parent_trace, uid):
            '''
            Returns the row number (an int) associated with the uid. If no such link is already recorded,
            returns None.

            @param uid A string representing a UID. Example: "P3.C5"
            '''
            if type(uid) != str:
                raise ApodeixiError(
                    parent_trace,
                    "Can't retrieve a row number for uid that is not an string",
                    data={"type(uid)": str(type(uid))})

            # There is some nuance to the search. Consider the case where the Product domain object needs to link to
            # the Line-of-Business domain object. A product might have a UID like "P1", which is what would be passed
            # to this method as the `uid` parameter.
            # In the simple case, self.uid_2_row might looks like:
            #
            #           {0: 'P1', 7: 'P2', ...}
            #
            # so we would just return 0 as the row number.
            #
            # However, it is possible that a product has sub products, in which case self.uid_2_row might looke like
            #
            #           {0: 'P1.SP1', 1: 'P1.SP2', 7: 'P2', ...}
            #
            # So nothing would be found under key 'P1', and we might inadvertently return a null row number that is both
            # incorrect and causes other exceptions downstream.
            # So in the algorithm used here, we look at all keys that are either "P1" or start with "P1.", and take the
            # one for which the row number is smallest, and return that
            #
            candidate_keys = [
                key for key in self.uid_2_row.keys()
                if key == uid or key.startswith(uid + ".")
            ]

            if len(candidate_keys) == 0:
                return None
            else:
                row_number = min(
                    [self.uid_2_row[key] for key in candidate_keys])
                return row_number
예제 #17
0
    def unabbreviate_uid(self, parent_trace, uid, acronym_schema):
        '''
        Returns a possibly modified UID. For example, a UID like "P4.3" might be replaced by "P4.C3".
        In other words, if the uid is one of those "abbreviated UIDs" that lacks acronyms (they arise
        for usability reasons in user-provided UIDs), attempt to infer the acronyms that are missing and
        return the full UID ("P4.C3" in the example)

        Potentially, if a UID skipped an entity, it relies on 0 digit to determine that. For example, if the
        entity schema is logically like  [A, I, SI, AS], if a full UID is A4.I3.AS2, then the SI entity
        was skipped.
        In that case, the correct abbreviated UID should be A4.3.0.2, instead of A4.3.2.

        That makes it possible for this method to line up 1-1 the tokens of the abbreviated UID to the
        acronym schema, to infer the correct unabbreviated UID. In the example, that would be inferring that
        A4.3.0.2 corresponds to A4.I3.AS2. Without the "0" digit, if we had A4.3.2, we would have 
        incorrectly inferred A4.I3.I2

        @acronym_schema Used to determine what acronyms to use in the full UID that is returned.
        '''
        # Path of the acronyms the store knows about so far. May not yet include the entity, if we
        # are adding a uid for that entity for the first time

        acronym_list = [
            acronyminfo.acronym
            for acronyminfo in acronym_schema.acronym_infos()
        ]

        # Calling self.tokenize produces "unabbreviated" tokens
        tokens = UID_Utils().tokenize(parent_trace, uid, acronym_list)
        if len(tokens) == 0:
            raise ApodeixiError(
                parent_trace,
                "Unable to parse and unabbreviate uid '" + str(uid) + "'")

        full_uid = ".".join(tokens)

        # Due to the possibility that the end user skipped some entities, need to pad
        # the UID before returning it. That is because in the current implementation our call to
        # self.tokenize wiped out the padding that might have existed in the abbreviated uid
        padded_uid = acronym_schema.pad_uid(parent_trace, full_uid)

        return padded_uid
예제 #18
0
    def manifestLabelsFromCoords(self, parent_trace, subnamespace, coords):
        '''
        Helper method that returns what the a dict whose keys are label field names that should be populated
        inside a manifest based on the parameters, and the values are what the value should be for each label.

        Usually used in the context of generating forms.

        Example: consider a manifest name like "modernization.dec-2020.fusionopus.default"
                in namespace "my-corp.production", that arose from a posting for product "Fusion Opus",
                scoring cycle "Dec 2020" and scenario "Default".

                Then this method returns ["modernization", "Dec 2020", "Fusion Opus", and "Default"].

        @param subnamespace A string, which is allowed to be None. If not null, this is a further partioning of
                        the namespace into finer slices, and a manifest's name is supposed to identify the slice
                        in which the manifest resides.

        @param coords A FilingCoords object corresponding to this controller. It is used, possibly along with the
                        `subnamespace` parameter, to build a manifest name.
        '''
        if not type(coords) == InitiativesFilingCoordinates:
            raise ApodeixiError(
                parent_trace,
                "Can't build manifest name because received wrong type of filing coordinates",
                data={
                    "Type of coords received": str(type(coords)),
                    "Expected type of coords": "InitiativesFilingCoordinates"
                })

        workstream_UID = coords.workstream_UID
        initiative = subnamespace
        scenario = coords.scenario
        scoring_cycle = coords.scoring_cycle

        MY_PL = Workstream_Controller._MyPostingLabel  # Abbreviation for readability
        result_dict = {}
        result_dict[MY_PL._WORKSTREAM_UID] = workstream_UID
        result_dict[MY_PL._INITIATIVE] = initiative
        result_dict[MY_PL._SCENARIO] = scenario
        result_dict[MY_PL._SCORING_CYCLE] = scoring_cycle

        return result_dict
예제 #19
0
    def parse_sandbox_announcement(self, parent_trace, announcement):
        '''
        Validates that the `announcement` is of the form "Using sandbox '210821.142725_sandbox'",
        and if so it returns the name of the sandbox, which in the example is '210821.142725_sandbox'

        If `announcement` does not conform with the expected form, this method raises an ApodeixiError
        '''
        ME                      = CLI_Utils
        if len(announcement) != (len(ME.PREFIX_EXPECTED) + ME.SANDBOX_NAME_LENGTH + len(ME.SUFFIX_EXPECTED)) \
            or not announcement.startswith(ME.PREFIX_EXPECTED) \
            or not announcement.endswith(ME.SUFFIX_EXPECTED):
            raise ApodeixiError(parent_trace, "Announcement is not in the expected form",
                                    data = {"announcement":     announcement,
                                            "expected":  self.sandox_announcement("<sandbox name>")})

        sandbox_name_start_idx  = len(ME.PREFIX_EXPECTED)
        sandbox_name_end_idx    = sandbox_name_start_idx + ME.SANDBOX_NAME_LENGTH
        sandbox_name            = announcement[sandbox_name_start_idx:sandbox_name_end_idx]

        return sandbox_name 
예제 #20
0
    def is_sublist(self, parent_trace, super_list, alleged_sub_list):
        '''
        Checks if `alleged_sub_list` is a sublist of `super_list`. Returns a boolean to state if it is a sublist, as well
        as two lists: pre_list and sub_list that are "split" by the `alleged_sub_list`. 
        If the boolean is True then the following will hold true:

            super_list == pre_list + alleged_sub_list + post_list
        
        If on the other hand the boolean is False, then both `pre_list` and `post_list` are None.

        If either the super_list or the alleged_sub_list is empty then it return false.
        '''
        if type(super_list) != list or type(alleged_sub_list) != list:
            raise ApodeixiError(
                parent_trace,
                "Can't determine if we have a sub list because was given wrong types, not lists",
                data={
                    'type of super_list': str(type(super_list)),
                    'type of alleged_sub_list': str(type(alleged_sub_list))
                })
        if len(super_list) == 0 or len(alleged_sub_list) == 0:
            return False, None, None

        # Get the indices in super_list for the first element of alleged_sub_list that leave enough room for the
        # alleged_sub_list to fit after that. These are candidate locations for a split
        sub_length = len(alleged_sub_list)
        candidate_idxs = [
            idx for idx, x in enumerate(super_list)
            if x == alleged_sub_list[0] and len(super_list[idx:]) >= sub_length
        ]

        # Now see if any of the candidate split locations work
        for idx in candidate_idxs:
            if alleged_sub_list == super_list[idx:idx +
                                              sub_length]:  # Found a match!
                pre_list = super_list[:idx]
                post_list = super_list[idx + sub_length:]
                return True, pre_list, post_list

        # If we get this far, there is no match
        return False, None, None
예제 #21
0
        def generateNextUID(self, parent_trace, branch, acronym):
            '''
            @param branch A list of pairs that specify a branch in the _TokenTree. For example:
                          [['P', 12], ['AC', 3], ['E', 45]]. If the acronym is 'W', it will add a node
                          [['W', 5]], say, if P12.AC3.E45.W1,2,3,4 already exist.
                          Returns two uids: a full UID P12.AC3.E45.W5 and the leaf UID W5
            '''
            # Validate acronym is valid
            REGEX = '^([a-zA-Z]+)$'
            m = _re.match(REGEX, acronym)
            if m == None or len(m.groups()) != 1:
                raise ApodeixiError(
                    parent_trace, "Invalid acronym='" + acronym +
                    "': expected something like 'P' or 'AV'.  " + "Level=" +
                    str(self.level))

            if len(branch) == 0:
                # We hit bottom
                leaf_uid = self._generateHere(parent_trace, acronym)
                full_uid = leaf_uid

            else:
                head = branch[0]
                tail = branch[1:]

                #       See GOTCHA comment in self.generateNextUID. It might be that we have received UIDs like
                # "BR1.SR0.TR1", and the "SR0" token is there only to enforce that TR is a level below SR, so that
                # the two are never in competition (which creates bugs). So if we have received an head like
                # "SR0", we shouldn't error out - just ignore it for purposes of walking down the tree, but
                # include it in the full_uid we return
                head_acronym, head_val = UID_Utils().parseToken(parent_trace,
                                                                token=head)
                if head_val == 0:
                    next_tree = self
                else:
                    next_tree = self._findChild(parent_trace, head)
                tail_uid, leaf_uid = next_tree.generateNextUID(
                    parent_trace, tail, acronym)
                full_uid = head + '.' + tail_uid

            return full_uid, leaf_uid
 def inferFilingCoords(self, parent_trace, posting_label):
     '''
     After a PostingLabel is read with self as the posting handle, we can read from the Posting Label
     what the "real" FilingCoords should have been, and the caller can use this method to get
     such "inferred" FilingCoordinates and replace self (a "TBD" FilingCoords) with the "real" one
     '''
     my_trace = parent_trace.doing(
         "Looking up filing class for given posting API",
         data={'posting_api': self._posting_api})
     filing_class = posting_label.controller.store.getFilingClass(
         parent_trace, self._posting_api)
     if filing_class == None:
         raise ApodeixiError(
             my_trace,
             "Can't build filing coordinates from a null filing class")
     my_trace = parent_trace.doing(
         "Validating that posting is in the right folder structure " +
         "within the Knowledge Base")
     filing_coords = filing_class().infer_from_label(
         parent_trace=my_trace, posting_label=posting_label)
     self._inferred_coords = filing_coords
예제 #23
0
    def _filename_2_api(self, parent_trace, filename):
        '''
        Helper method that can be used by derived classes to infer the posting api from a filename.

        Returns a string: the posting api. Raises an ApodeixiError if none of the store's supported apis matches the filename.
        '''
        posting_api = None
        supported_apis = self.supported_apis(parent_trace=parent_trace)
        for api in supported_apis:
            if filename.endswith(api + ".xlsx"):
                posting_api = api
                break
        if posting_api == None:
            raise ApodeixiError(
                parent_trace,
                "Filename is not for an API supported by the Knowledge Base store",
                data={
                    'filename': filename,
                    'supported apis': str(supported_apis)
                })
        return posting_api
예제 #24
0
    def _posting_testing_skeleton(self, store, test_case_name, excel_file):

        all_manifests_dicts                     = []

        try:
            root_trace                          = FunctionalTrace(parent_trace=None, path_mask=self._path_mask).doing("Posting excel file", 
                                                                                data={  'excel_file'    : excel_file},
                                                                                origination = {
                                                                                        'signaled_from' : __file__,
                                                                                        'concrete class': str(self.__class__.__name__)})

            kbase                               = KnowledgeBase(root_trace, store, a6i_config=self.a6i_config)

            response, log_txt                   = kbase.postByFile( parent_trace                = root_trace, 
                                                                    path_of_file_being_posted   = excel_file,
                                                                    excel_sheet                 = "Sheet1")

            NB_MANIFESTS_EXPECTED               = 3
            if len(response.createdManifests()) != NB_MANIFESTS_EXPECTED:
                raise ApodeixiError(root_trace, 'Expected ' + str(NB_MANIFESTS_EXPECTED) + ' manifests, but found ' 
                                    + str(len(all_manifests_dicts)))

            # Retrieve the manifests created
            manifest_dict                       = {}
            for handle in response.createdManifests():
                loop_trace                      = root_trace.doing("Retrieving manifest for handle " + str(handle),
                                                        origination = {    
                                                                    'concrete class': str(self.__class__.__name__), 
                                                                    'signaled_from': __file__})
                manifest_dict, manifest_path    = store.retrieveManifest(loop_trace, handle)
                self._compare_to_expected_yaml(loop_trace, manifest_dict, test_case_name + "." + handle.kind)

            return
        except ApodeixiError as ex:
            print(ex.trace_message()) 
            self.assertTrue(1==2)                 

        # If we get this far, the tests failed since we should have returned within the try statement. 
        # So hardcode an informative failure.
        self.assertTrue("Shouldn't have gotten to this line" == 0)                                                                      
예제 #25
0
    def get_environment_filter(self, parent_trace, kb_session, filter_type, sandbox):
        '''
        Returns a lambda that can be used as a filter for environments, whenver searching for objects
        across the KnowledgeBaseStore.

        @param filter_type A string. Must be one of: 
            * CLI_Utils.ONLY_BASE_ENV_FILTER
            * CLI_Utils.ANY_ENV_FILTER
            * CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER

        @param sandbox A string, possibly null, corresponding to the name of a sandbox environment.
            It is only relevant for filter_type=CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER, to denote
            the sandbox that is allowed.
        '''

        # Define the possible environment filters
        def _only_base_env_filter(env_name):
            if env_name == kb_session.store.base_environment(parent_trace).name(parent_trace):
                return True
            return False

        def _any_env_filter(env_name):
            return True

        def _specific_sandbox_env_filter(env_name):
            if env_name == sandbox:
                return True
            return False
        if filter_type == CLI_Utils.ONLY_BASE_ENV_FILTER:
            return _only_base_env_filter
        elif filter_type == CLI_Utils.ANY_ENV_FILTER:
            return _any_env_filter
        elif filter_type == CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER:
            return _specific_sandbox_env_filter
        else:
            raise ApodeixiError(parent_trace, "Unknown filter type '" + str(filter_type) + "'",
                                            data = {"allowed filter types": str([CLI_Utils.ONLY_BASE_ENV_FILTER,
                                                                            CLI_Utils.ANY_ENV_FILTER,
                                                                            CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER])})
    def path_tokens(self, parent_trace):
        '''
        Returns a list of strings, corresponding to the path tokens implicit by this FilingCoordinates instance.
        '''
        if self.scoringCycle == None or self.initiative == None or self.workstream_UID == None:
            raise ApodeixiError(
                parent_trace,
                "Can't provide path_tokens because InitiativesFilingCoordinates is not "
                + "fully built",
                data={
                    "scoringCycle": self.scoringCycle,
                    "initiative": self.initiative,
                    "workstream_UID": self.workstream_UID,
                    "scenario": self.scenario
                })

        # Note: we save to a path like
        #
        #               .../excel-postings/initiatives/FY 22/s1.w0/w0-workstream.initiatives.a6i.xlsx
        #
        # as opposed to one like
        #
        #               .../excel-postings/initiatives/FY 22/s1/w0/w0-workstream.initiatives.a6i.xlsx
        #
        # hence the need to assemble the "mixed field" that in the example would be "s1.w0"
        mixed_field = self.initiative + "." + self.workstream_UID
        if self.scenario != None:
            result = [
                InitiativesFilingCoordinates.INITIATIVES, self.scoringCycle,
                mixed_field, self.scenario
            ]
        else:
            result = [
                InitiativesFilingCoordinates.INITIATIVES, self.scoringCycle,
                mixed_field
            ]

        return result
예제 #27
0
    def row_from_uid(self, parent_trace, manifest_identifier, uid):
        '''
        This is the inverse function to uid_from_row.

        It finds and returns the unique dataframe row number for the row that contains the given uid as its
        last UID.

        If we think of the DataFrame rows as branches in a tree, then this returns the branch number given
        the UID of the branch's leaf node.
        '''
        if not manifest_identifier in self.links_dict.keys():
            raise ApodeixiError(
                parent_trace,
                "Can't retrieve row number from UID because manifest has no links "
                + "associated with it",
                data={
                    "manifest_identifier": str(manifest_identifier),
                    "uid": str(uid)
                })

        links = self.links_dict[manifest_identifier]
        row_number = links.find_row_number(parent_trace, uid)
        return row_number
        def preflightPostingValidation(self, parent_trace, posted_content_df):
            '''
            Method performs some initial validation of the `dataframe`, which is intended to be a DataFrame representation of the
            data posted in Excel.

            The intention for this preflight validation is to provide the user with more user-friendly error messages that
            educate the user on what he/she should change in the posting for it to be valid. In the absence of this 
            preflight validation, the posting error from the user would eventually be caught deeper in the parsing logic,
            by which time the error generated might not be too user friendly.

            Thus this method is not so much to avoid corruption of the data, since downstream logic will prevent corruption
            anyway. Rather, it is to provide usability by outputting high-level user-meaningful error messages.
            '''
            ME                              = Mock_Controller._AccountPropertiesConfig
            posted_cols                     = list(posted_content_df.columns)
            mandatory_cols                  = [ME._ENTITY_NAME]
            mandatory_cols.extend(ME._SPLITTING_COLUMNS)
            missing_cols                    = [col for col in mandatory_cols if not col in posted_cols]
            if len(missing_cols) > 0:
                raise ApodeixiError(parent_trace, "Posting lacks some mandatory columns. This often happens if "
                                                    + "ranges are wrong in Posting Label.",
                                                    data = {    'Missing columns':    missing_cols,
                                                                'Posted columns':     posted_cols})
예제 #29
0
    def buildIntervals(self, parent_trace, linear_space):
        '''
        '''
        #if self.entity_name == None:
        # Overwrite self.entity_name to be consistent with the linear space given
        self.entity_name = IntervalUtils().infer_first_entity(
            parent_trace, linear_space)

        my_trace = parent_trace.doing(
            "Validating mandatory columns are present")

        missing_cols = [
            col for col in self.mandatory_columns if not col in linear_space
        ]
        if len(missing_cols) > 0:
            raise ApodeixiError(my_trace,
                                "Posting lacks some mandatory columns",
                                data={
                                    'Missing columns': missing_cols,
                                    'Posted columns': linear_space
                                })

        return [Interval(parent_trace, linear_space, self.entity_name)]
예제 #30
0
    def add_known_uid(self, parent_trace, uid, last_acronym=None):
        '''
        Records that the `uid` is already used, and therefore no generated UID should be like it.

        Use with caution: normally this method should not be used, since normally a user's posting
        should normally include a UID that was generated previously during processing of an earlier posting
        that gave rise to a manifest being persisted. That manifest would have the UID and normally if the
        user makes an update to the posting, that update controller logic would call the method
        `initializeFromManifest` on the UID Store to seed the UID Store with such previously generated
        UIDs.

        So it rare when we need to *forcefully* tell the UID store that a UID is already reserved,
        and mainly in internal Apodeix low-level code, not by application code.

        @param uid A string such as "JTBD1.C1.F1.S1"
        @param last_acronym A string for the entity of the leaf UID. In the example "JTBD1.C1.F1.S1",
                    perhaps "S" stands for "Story", and "S" would be the last_acronym passed.
                    The reason for needing this parameter is that for usability reasons the user may
                    abbreviate the UID to something like "1.1.1.1", and the system needs to infer 
                    the acronyms. The UID Store would already now about the ancestors (JTBD, C, F) but
                    might not yet know about the leaf UID acronym ("S"), which is therefore passed
                    by the caller (the caller typically is the BreakdownTree class that would know
                    how to get such "last acronym")
        '''
        if self.acronym_schema == None:
            raise ApodeixiError(
                parent_trace,
                "Detected incorrectly built UID_Store while adding a known uid: this "
                + "UID_Store's acronym schema is not initialized",
                {"uid": str(uid)})

        known_acronym_list = [
            info.acronym for info in self.acronym_schema.acronym_infos()
        ]

        self._mark_uid_as_used(parent_trace, uid, known_acronym_list,
                               self.tree)