def getMonthFiscalYearStarts(self, parent_trace): my_trace = parent_trace.doing( "Retrieving Knowledge Base's fiscal year start from the Apodeixi Configuration " ) SETTINGS = 'organization-settings' FY_START = 'month-fiscal-year-starts' check, explanation = DictionaryUtils().validate_path( parent_trace=my_trace, root_dict=self.config_dict, root_dict_name='apodeixi', path_list=[SETTINGS, FY_START], valid_types=[int]) if not check: raise ApodeixiError( my_trace, "Can't locate Knowledge Base's fiscal year start: " + explanation) return self.config_dict[SETTINGS][FY_START]
def __init__(self, parent_trace, read_misses_policy, use_timestamps=True, path_mask=None): ME = KB_Environment_Config if not read_misses_policy in ME.READ_MISSES_POLICIES: raise ApodeixiError( parent_trace, "The read misses policy that was provided is not supported", data={ "read_misses_policy": str(read_misses_policy), "supported policies": str(ME.READ_MISSES_POLICIES) }) self.read_misses_policy = read_misses_policy self.use_timestamps = use_timestamps self.path_mask = path_mask
def troubleshoot_command(): root_trace = FunctionalTrace(parent_trace=None, path_mask=None).doing("Troubleshooting") runner = CliRunner() result = runner.invoke(CLI, COMMAND) if result.exit_code != 0: raise ApodeixiError(root_trace, "CLI command failed", data={ "CLI exit code": str(result.exit_code), "CLI exception": str(result.exc_info), "CLI output": str(result.output), "CLI traceback": str(result.exc_info) }) return
def path_tokens(self, parent_trace): ''' Returns a list of strings, corresponding to the path tokens implicit by this FilingCoordinates instance. ''' if self.scoringCycle == None or self.product == None or self.scenario == None: raise ApodeixiError( parent_trace, "Can't provide path_tokens because JourneysFilingCoordinates is not fully built", data={ "scoringCycle": self.scoringCycle, "product": self.product, "scenario": self.scenario }) return [ JourneysFilingCoordinates.JOURNEYS, self.scoringCycle, self.product, self.scenario ]
def find_uid(self, parent_trace, row_number): ''' Returns the UID (a string) associated with the given row number. If no such link is already recorded, returns None @param row_number An int representing a row number in a tabular representation of a manifest. ''' if type(row_number) != int: raise ApodeixiError( parent_trace, "Can't retrieve a UID for a row number that is not an int", data={"type(row_number)": str(type(row_number))}) if row_number in self.row_2_uid.keys(): uid = self.row_2_uid[row_number] return uid else: return None
def getKnowledgeBaseAreas(self, parent_trace): my_trace = parent_trace.doing( "Retrieving Knowledge Base's areas from the Apodeixi Configuration " ) SETTINGS = 'organization-settings' ORGANIZATION = 'knowledge-base-areas' check, explanation = DictionaryUtils().validate_path( parent_trace=my_trace, root_dict=self.config_dict, root_dict_name='apodeixi', path_list=[SETTINGS, ORGANIZATION], valid_types=[list]) if not check: raise ApodeixiError( my_trace, "Can't locate Knowledge Base's areas: " + explanation) return self.config_dict[SETTINGS][ORGANIZATION]
def preprocessReadFragment(self, parent_trace, interval, dataframe_row): ''' This is called by the BreakdownTree's readDataframeFragment method before attempting to parse a fragment from a row in a DataFrame. This method is offered as a "hook" to derived classes in case they want to "enrich" the input to the parser, by overwriting this method with the appropriate "enriching" logic. It returns "potentically improved" versions of the `interval` and `dataframe_row` parameters. For this concrete class, the gist of what this method does is validate the input, i.e., that the scoring cycles can be parsed into valid FY_Quarter objects @param interval An Interval object, corresponding to the columns in `row` that pertain to an entity being processed in readDataframeFragment @param dataframe_row A tuple `(idx, series)` representing a row in a larger Pandas Dataframe as yielded by the Dataframe `iterrows()` iterator. @returns A pair: 1) an Interval object, and 2) tuple `(idx, series)` that may pass for a Pandas row ''' scoring_cycle = dataframe_row[1][self.SCORING_CYCLE_COL] grandfathered = self.controller.a6i_config.getGrandfatheredScoringCycles( parent_trace) if not IntervalUtils().is_blank( scoring_cycle) and not scoring_cycle in grandfathered: # See if we can parse it try: time_bucket = FY_Quarter.build_FY_Quarter( parent_trace, scoring_cycle) except ApodeixiError as ex: raise ApodeixiError( parent_trace, "Can't process posting because there it at least one invalid scoring " + "cycle. Ideally they should be something like 'FY22' or 'Q2 FY25'", data={ "invalid scoring cycle": str(scoring_cycle), "parsing error": ex.msg }) return interval, dataframe_row
def load_csv(self, parent_trace, path, header=0): ''' Helper method to load a "clean DataFrame" from a CSV file, correcting for spurious columns and NAs ''' try: # Important to set the encoding to "ISO-8859-1". Otherwise will get errors as explained in # https://stackoverflow.com/questions/19699367/for-line-in-results-in-unicodedecodeerror-utf-8-codec-cant-decode-byte data_df = _pd.read_csv(path, encoding = "ISO-8859-1", header=header) except FileNotFoundError as ex: raise ApodeixiError(parent_trace, "Can't load CSV file because it doesn't exist", data = {'path': path, 'error': str(ex)}) data_df = data_df.fillna('') SPURIOUS_COL = 'Unnamed: 0' if SPURIOUS_COL in data_df.columns: data_df = data_df.drop([SPURIOUS_COL], axis=1) # We will have to clean data a bit, since some packaging procedures (for example, # creating a Conda package) introduces some carriage returns '\r\n' where the original expected output only # has a newline '\n', causing tests to fail when users install the Conda package. So simply remove the # '\r' from amu offending column, which typically are columns whose values are stringied arrays (i.e., strings with # newlines '\n' that confuse the Conda packaging). For packaging procedures that have no '\r', no harm is # done by this cleanup (i.e., expected_df is left "as is" if there are no '\r' in its 'Words per row') def _remove_carriage_returns(obj): if type(obj) == str: return obj.replace('\\r', '').replace('\r', '') elif type(obj) == list: # Remove carriages element by element return [_remove_carriage_returns(elt) for elt in obj] else: return obj # First clean the columns data_df.columns = [_remove_carriage_returns(col) for col in data_df.columns] # Now clear the cells for col in data_df.columns: data_df[col] = data_df.apply(lambda row: _remove_carriage_returns(row[col]), axis=1) # Clean up numbers and all else to a standard CLEANED = DataFrameUtils().clean # Abbreviation to express intent for col in data_df.columns: data_df[col] = data_df.apply(lambda row: CLEANED(row[col]), axis=1) return data_df
def getPostingConfig(self, parent_trace, kind, manifest_nb): ''' Return a PostingConfig, corresponding to the configuration that this concrete controller supports. ''' ME = ScoringCyclesController if kind in self.SUPPORTED_KINDS: update_policy = UpdatePolicy(reuse_uids=True, merge=False) xlr_config = ME._ScoringCycleConfig(kind=kind, update_policy=update_policy, manifest_nb=manifest_nb, controller=self) else: raise ApodeixiError(parent_trace, "Invalid domain object '" + kind + "' - should be one of " + ", ".join(self.SUPPORTED_KINDS), origination={'signaled_from': __file__}) return xlr_config
def is_leaf(self, parent_trace, path): ''' Returns true if 'path` is the name of a file or folder (e.g., "my_file.txt") as opposed to a path with at least some parent directories, like "project/config/my_file.txt" would be. ''' if type(path) != str: raise ApodeixiError( parent_trace, "The given path should be a string, but is not", data={ "type(path)": str(type(path)), "str(path)": str(path) }) pair = _os.path.split(path) if len(pair[0]) == 0: return True return False
def get_ExternalCollaborationFolder(self, parent_trace): my_trace = parent_trace.doing( "Retrieving external collaboration root folder from the " + "Apodeixi Configuration ") KB = 'knowledge-base' EXTERNAL_FOLDER = 'external-collaboration-folder' check, explanation = DictionaryUtils().validate_path( parent_trace=my_trace, root_dict=self.config_dict, root_dict_name='apodeixi', path_list=[KB, EXTERNAL_FOLDER], valid_types=[str]) if not check: raise ApodeixiError( my_trace, "Can't locate external collaboration folder: " + explanation) # Expand any environment variables in the path return _os.path.expandvars(self.config_dict[KB][EXTERNAL_FOLDER])
def get_KB_RootFolder(self, parent_trace): my_trace = parent_trace.doing( "Retrieving Knowledge Base's root folder from the Apodeixi Configuration " ) KB = 'knowledge-base' ROOT_FOLDER = 'knowledge-base-root-folder' check, explanation = DictionaryUtils().validate_path( parent_trace=my_trace, root_dict=self.config_dict, root_dict_name='apodeixi', path_list=[KB, ROOT_FOLDER], valid_types=[str]) if not check: raise ApodeixiError( my_trace, "Can't locate root folder for Knowledge Base: " + explanation) # Expand any environment variables in the path return _os.path.expandvars(self.config_dict[KB][ROOT_FOLDER])
def uid_from_row(self, parent_trace, manifest_identifier, row_number): ''' Finds and returns the last (i.e., most granular) UID for the given row number. If we think of the DataFrame's row as a branch in a tree, the UID returned corresponds to the leaf of the branch. ''' if not manifest_identifier in self.links_dict.keys(): raise ApodeixiError( parent_trace, "Can't retrieve UID from row number because manifest has no links " + "associated with it", data={ "manifest_identifier": str(manifest_identifier), "row_number": str(row_number) }) links = self.links_dict[manifest_identifier] uid = links.find_uid(parent_trace, row_number) return uid
def find_foreign_uid(self, parent_trace, our_manifest_id, foreign_manifest_id, our_manifest_uid, many_to_one): ''' Used to establish joins between manifest by determining the foreign key to use in the join, i.e., a way for "our manifest" to reference a "foreign manifest". Specifically, it assumes a link exists in this LinkTable between one of our manifest's UIDs and one of the foreign manifest's and finds and return's that foreign manifest's UIDs that our UID is linked to. @param many_to_one A boolean. If True, it is that multiple rows of our manifest correspond to the same row of the foreign manifest, and only the first such row would have displayed the foreign UID. That triggers a need to "search" for earlier row numbers ''' row_nb = self.row_from_uid(parent_trace=parent_trace, manifest_identifier=our_manifest_id, uid=our_manifest_uid) if row_nb == None: raise ApodeixiError( parent_trace, "Can't find foreign uid because our manifest uid does not appear to be in any row", data={ "our_manifest_id": str(our_manifest_id), "our_manifest_uid": str(our_manifest_uid), "foreign_manifest_id": str(foreign_manifest_id) }) if many_to_one == False: # search only in row_nb foreign_uid = self.uid_from_row( parent_trace=parent_trace, manifest_identifier=foreign_manifest_id, row_number=row_nb) else: # search first in row_nb, and if nothing is found keep looking in earlier rows foreign_uid = None for current_row in reversed(range(row_nb + 1)): foreign_uid = self.uid_from_row( parent_trace=parent_trace, manifest_identifier=foreign_manifest_id, row_number=current_row) if foreign_uid != None: break return foreign_uid
def without_comments_in_parenthesis(self, parent_trace, txt_or_tuple): ''' Returns a modified version of `txt_or_tuple`. If `txt_or_tuple` is a string, then it returns a substring of `txt_or_tuple` ignoring any sub-text within `txt_or_tuple` that is in parenthesis. It also strips any leading or trailing spaces. For example, if txt is 'Effort (man days) to deliver', then this function return 'Effort to deliver' If `txt_or_tuple` is a tuple then it applies the same logic to each string in the tuple, and returns the tuple. ''' def _strip_parenthesis(parent_trace, txt): if type(txt) != str: raise ApodeixiError( parent_trace, "Encountered problem removing comments in parenthesis: expected a string, " + "but instead was given a '" + str(type(txt)), data={"invalid input": str(txt)}) stripped_txt = StringUtils().strip(txt) # Remove text within parenthesis, if any, using the natural language tool nltk.tokenize.SExprTokenizer sexpr = SExprTokenizer(strict=False) sexpr_tokens = sexpr.tokenize(stripped_txt) parenthesis_free_tokens = [ t for t in sexpr_tokens if not ')' in t and not '(' in t ] parentheis_free_txt = ' '.join(parenthesis_free_tokens) return parentheis_free_txt if type(txt_or_tuple) == str: return _strip_parenthesis(parent_trace, txt_or_tuple) elif type( txt_or_tuple ) == tuple: # This happens when there is a MultiLevel index for the columns result_list = [ _strip_parenthesis(parent_trace, txt) for txt in txt_or_tuple ] return tuple(result_list) else: raise ApodeixiError( parent_trace, "Expected column header to be a string or tuple, not a '" + str(type(txt_or_tuple)) + "'")
def find_row_number(self, parent_trace, uid): ''' Returns the row number (an int) associated with the uid. If no such link is already recorded, returns None. @param uid A string representing a UID. Example: "P3.C5" ''' if type(uid) != str: raise ApodeixiError( parent_trace, "Can't retrieve a row number for uid that is not an string", data={"type(uid)": str(type(uid))}) # There is some nuance to the search. Consider the case where the Product domain object needs to link to # the Line-of-Business domain object. A product might have a UID like "P1", which is what would be passed # to this method as the `uid` parameter. # In the simple case, self.uid_2_row might looks like: # # {0: 'P1', 7: 'P2', ...} # # so we would just return 0 as the row number. # # However, it is possible that a product has sub products, in which case self.uid_2_row might looke like # # {0: 'P1.SP1', 1: 'P1.SP2', 7: 'P2', ...} # # So nothing would be found under key 'P1', and we might inadvertently return a null row number that is both # incorrect and causes other exceptions downstream. # So in the algorithm used here, we look at all keys that are either "P1" or start with "P1.", and take the # one for which the row number is smallest, and return that # candidate_keys = [ key for key in self.uid_2_row.keys() if key == uid or key.startswith(uid + ".") ] if len(candidate_keys) == 0: return None else: row_number = min( [self.uid_2_row[key] for key in candidate_keys]) return row_number
def unabbreviate_uid(self, parent_trace, uid, acronym_schema): ''' Returns a possibly modified UID. For example, a UID like "P4.3" might be replaced by "P4.C3". In other words, if the uid is one of those "abbreviated UIDs" that lacks acronyms (they arise for usability reasons in user-provided UIDs), attempt to infer the acronyms that are missing and return the full UID ("P4.C3" in the example) Potentially, if a UID skipped an entity, it relies on 0 digit to determine that. For example, if the entity schema is logically like [A, I, SI, AS], if a full UID is A4.I3.AS2, then the SI entity was skipped. In that case, the correct abbreviated UID should be A4.3.0.2, instead of A4.3.2. That makes it possible for this method to line up 1-1 the tokens of the abbreviated UID to the acronym schema, to infer the correct unabbreviated UID. In the example, that would be inferring that A4.3.0.2 corresponds to A4.I3.AS2. Without the "0" digit, if we had A4.3.2, we would have incorrectly inferred A4.I3.I2 @acronym_schema Used to determine what acronyms to use in the full UID that is returned. ''' # Path of the acronyms the store knows about so far. May not yet include the entity, if we # are adding a uid for that entity for the first time acronym_list = [ acronyminfo.acronym for acronyminfo in acronym_schema.acronym_infos() ] # Calling self.tokenize produces "unabbreviated" tokens tokens = UID_Utils().tokenize(parent_trace, uid, acronym_list) if len(tokens) == 0: raise ApodeixiError( parent_trace, "Unable to parse and unabbreviate uid '" + str(uid) + "'") full_uid = ".".join(tokens) # Due to the possibility that the end user skipped some entities, need to pad # the UID before returning it. That is because in the current implementation our call to # self.tokenize wiped out the padding that might have existed in the abbreviated uid padded_uid = acronym_schema.pad_uid(parent_trace, full_uid) return padded_uid
def manifestLabelsFromCoords(self, parent_trace, subnamespace, coords): ''' Helper method that returns what the a dict whose keys are label field names that should be populated inside a manifest based on the parameters, and the values are what the value should be for each label. Usually used in the context of generating forms. Example: consider a manifest name like "modernization.dec-2020.fusionopus.default" in namespace "my-corp.production", that arose from a posting for product "Fusion Opus", scoring cycle "Dec 2020" and scenario "Default". Then this method returns ["modernization", "Dec 2020", "Fusion Opus", and "Default"]. @param subnamespace A string, which is allowed to be None. If not null, this is a further partioning of the namespace into finer slices, and a manifest's name is supposed to identify the slice in which the manifest resides. @param coords A FilingCoords object corresponding to this controller. It is used, possibly along with the `subnamespace` parameter, to build a manifest name. ''' if not type(coords) == InitiativesFilingCoordinates: raise ApodeixiError( parent_trace, "Can't build manifest name because received wrong type of filing coordinates", data={ "Type of coords received": str(type(coords)), "Expected type of coords": "InitiativesFilingCoordinates" }) workstream_UID = coords.workstream_UID initiative = subnamespace scenario = coords.scenario scoring_cycle = coords.scoring_cycle MY_PL = Workstream_Controller._MyPostingLabel # Abbreviation for readability result_dict = {} result_dict[MY_PL._WORKSTREAM_UID] = workstream_UID result_dict[MY_PL._INITIATIVE] = initiative result_dict[MY_PL._SCENARIO] = scenario result_dict[MY_PL._SCORING_CYCLE] = scoring_cycle return result_dict
def parse_sandbox_announcement(self, parent_trace, announcement): ''' Validates that the `announcement` is of the form "Using sandbox '210821.142725_sandbox'", and if so it returns the name of the sandbox, which in the example is '210821.142725_sandbox' If `announcement` does not conform with the expected form, this method raises an ApodeixiError ''' ME = CLI_Utils if len(announcement) != (len(ME.PREFIX_EXPECTED) + ME.SANDBOX_NAME_LENGTH + len(ME.SUFFIX_EXPECTED)) \ or not announcement.startswith(ME.PREFIX_EXPECTED) \ or not announcement.endswith(ME.SUFFIX_EXPECTED): raise ApodeixiError(parent_trace, "Announcement is not in the expected form", data = {"announcement": announcement, "expected": self.sandox_announcement("<sandbox name>")}) sandbox_name_start_idx = len(ME.PREFIX_EXPECTED) sandbox_name_end_idx = sandbox_name_start_idx + ME.SANDBOX_NAME_LENGTH sandbox_name = announcement[sandbox_name_start_idx:sandbox_name_end_idx] return sandbox_name
def is_sublist(self, parent_trace, super_list, alleged_sub_list): ''' Checks if `alleged_sub_list` is a sublist of `super_list`. Returns a boolean to state if it is a sublist, as well as two lists: pre_list and sub_list that are "split" by the `alleged_sub_list`. If the boolean is True then the following will hold true: super_list == pre_list + alleged_sub_list + post_list If on the other hand the boolean is False, then both `pre_list` and `post_list` are None. If either the super_list or the alleged_sub_list is empty then it return false. ''' if type(super_list) != list or type(alleged_sub_list) != list: raise ApodeixiError( parent_trace, "Can't determine if we have a sub list because was given wrong types, not lists", data={ 'type of super_list': str(type(super_list)), 'type of alleged_sub_list': str(type(alleged_sub_list)) }) if len(super_list) == 0 or len(alleged_sub_list) == 0: return False, None, None # Get the indices in super_list for the first element of alleged_sub_list that leave enough room for the # alleged_sub_list to fit after that. These are candidate locations for a split sub_length = len(alleged_sub_list) candidate_idxs = [ idx for idx, x in enumerate(super_list) if x == alleged_sub_list[0] and len(super_list[idx:]) >= sub_length ] # Now see if any of the candidate split locations work for idx in candidate_idxs: if alleged_sub_list == super_list[idx:idx + sub_length]: # Found a match! pre_list = super_list[:idx] post_list = super_list[idx + sub_length:] return True, pre_list, post_list # If we get this far, there is no match return False, None, None
def generateNextUID(self, parent_trace, branch, acronym): ''' @param branch A list of pairs that specify a branch in the _TokenTree. For example: [['P', 12], ['AC', 3], ['E', 45]]. If the acronym is 'W', it will add a node [['W', 5]], say, if P12.AC3.E45.W1,2,3,4 already exist. Returns two uids: a full UID P12.AC3.E45.W5 and the leaf UID W5 ''' # Validate acronym is valid REGEX = '^([a-zA-Z]+)$' m = _re.match(REGEX, acronym) if m == None or len(m.groups()) != 1: raise ApodeixiError( parent_trace, "Invalid acronym='" + acronym + "': expected something like 'P' or 'AV'. " + "Level=" + str(self.level)) if len(branch) == 0: # We hit bottom leaf_uid = self._generateHere(parent_trace, acronym) full_uid = leaf_uid else: head = branch[0] tail = branch[1:] # See GOTCHA comment in self.generateNextUID. It might be that we have received UIDs like # "BR1.SR0.TR1", and the "SR0" token is there only to enforce that TR is a level below SR, so that # the two are never in competition (which creates bugs). So if we have received an head like # "SR0", we shouldn't error out - just ignore it for purposes of walking down the tree, but # include it in the full_uid we return head_acronym, head_val = UID_Utils().parseToken(parent_trace, token=head) if head_val == 0: next_tree = self else: next_tree = self._findChild(parent_trace, head) tail_uid, leaf_uid = next_tree.generateNextUID( parent_trace, tail, acronym) full_uid = head + '.' + tail_uid return full_uid, leaf_uid
def inferFilingCoords(self, parent_trace, posting_label): ''' After a PostingLabel is read with self as the posting handle, we can read from the Posting Label what the "real" FilingCoords should have been, and the caller can use this method to get such "inferred" FilingCoordinates and replace self (a "TBD" FilingCoords) with the "real" one ''' my_trace = parent_trace.doing( "Looking up filing class for given posting API", data={'posting_api': self._posting_api}) filing_class = posting_label.controller.store.getFilingClass( parent_trace, self._posting_api) if filing_class == None: raise ApodeixiError( my_trace, "Can't build filing coordinates from a null filing class") my_trace = parent_trace.doing( "Validating that posting is in the right folder structure " + "within the Knowledge Base") filing_coords = filing_class().infer_from_label( parent_trace=my_trace, posting_label=posting_label) self._inferred_coords = filing_coords
def _filename_2_api(self, parent_trace, filename): ''' Helper method that can be used by derived classes to infer the posting api from a filename. Returns a string: the posting api. Raises an ApodeixiError if none of the store's supported apis matches the filename. ''' posting_api = None supported_apis = self.supported_apis(parent_trace=parent_trace) for api in supported_apis: if filename.endswith(api + ".xlsx"): posting_api = api break if posting_api == None: raise ApodeixiError( parent_trace, "Filename is not for an API supported by the Knowledge Base store", data={ 'filename': filename, 'supported apis': str(supported_apis) }) return posting_api
def _posting_testing_skeleton(self, store, test_case_name, excel_file): all_manifests_dicts = [] try: root_trace = FunctionalTrace(parent_trace=None, path_mask=self._path_mask).doing("Posting excel file", data={ 'excel_file' : excel_file}, origination = { 'signaled_from' : __file__, 'concrete class': str(self.__class__.__name__)}) kbase = KnowledgeBase(root_trace, store, a6i_config=self.a6i_config) response, log_txt = kbase.postByFile( parent_trace = root_trace, path_of_file_being_posted = excel_file, excel_sheet = "Sheet1") NB_MANIFESTS_EXPECTED = 3 if len(response.createdManifests()) != NB_MANIFESTS_EXPECTED: raise ApodeixiError(root_trace, 'Expected ' + str(NB_MANIFESTS_EXPECTED) + ' manifests, but found ' + str(len(all_manifests_dicts))) # Retrieve the manifests created manifest_dict = {} for handle in response.createdManifests(): loop_trace = root_trace.doing("Retrieving manifest for handle " + str(handle), origination = { 'concrete class': str(self.__class__.__name__), 'signaled_from': __file__}) manifest_dict, manifest_path = store.retrieveManifest(loop_trace, handle) self._compare_to_expected_yaml(loop_trace, manifest_dict, test_case_name + "." + handle.kind) return except ApodeixiError as ex: print(ex.trace_message()) self.assertTrue(1==2) # If we get this far, the tests failed since we should have returned within the try statement. # So hardcode an informative failure. self.assertTrue("Shouldn't have gotten to this line" == 0)
def get_environment_filter(self, parent_trace, kb_session, filter_type, sandbox): ''' Returns a lambda that can be used as a filter for environments, whenver searching for objects across the KnowledgeBaseStore. @param filter_type A string. Must be one of: * CLI_Utils.ONLY_BASE_ENV_FILTER * CLI_Utils.ANY_ENV_FILTER * CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER @param sandbox A string, possibly null, corresponding to the name of a sandbox environment. It is only relevant for filter_type=CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER, to denote the sandbox that is allowed. ''' # Define the possible environment filters def _only_base_env_filter(env_name): if env_name == kb_session.store.base_environment(parent_trace).name(parent_trace): return True return False def _any_env_filter(env_name): return True def _specific_sandbox_env_filter(env_name): if env_name == sandbox: return True return False if filter_type == CLI_Utils.ONLY_BASE_ENV_FILTER: return _only_base_env_filter elif filter_type == CLI_Utils.ANY_ENV_FILTER: return _any_env_filter elif filter_type == CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER: return _specific_sandbox_env_filter else: raise ApodeixiError(parent_trace, "Unknown filter type '" + str(filter_type) + "'", data = {"allowed filter types": str([CLI_Utils.ONLY_BASE_ENV_FILTER, CLI_Utils.ANY_ENV_FILTER, CLI_Utils.SPECIFIC_SANDBOX_ENV_FILTER])})
def path_tokens(self, parent_trace): ''' Returns a list of strings, corresponding to the path tokens implicit by this FilingCoordinates instance. ''' if self.scoringCycle == None or self.initiative == None or self.workstream_UID == None: raise ApodeixiError( parent_trace, "Can't provide path_tokens because InitiativesFilingCoordinates is not " + "fully built", data={ "scoringCycle": self.scoringCycle, "initiative": self.initiative, "workstream_UID": self.workstream_UID, "scenario": self.scenario }) # Note: we save to a path like # # .../excel-postings/initiatives/FY 22/s1.w0/w0-workstream.initiatives.a6i.xlsx # # as opposed to one like # # .../excel-postings/initiatives/FY 22/s1/w0/w0-workstream.initiatives.a6i.xlsx # # hence the need to assemble the "mixed field" that in the example would be "s1.w0" mixed_field = self.initiative + "." + self.workstream_UID if self.scenario != None: result = [ InitiativesFilingCoordinates.INITIATIVES, self.scoringCycle, mixed_field, self.scenario ] else: result = [ InitiativesFilingCoordinates.INITIATIVES, self.scoringCycle, mixed_field ] return result
def row_from_uid(self, parent_trace, manifest_identifier, uid): ''' This is the inverse function to uid_from_row. It finds and returns the unique dataframe row number for the row that contains the given uid as its last UID. If we think of the DataFrame rows as branches in a tree, then this returns the branch number given the UID of the branch's leaf node. ''' if not manifest_identifier in self.links_dict.keys(): raise ApodeixiError( parent_trace, "Can't retrieve row number from UID because manifest has no links " + "associated with it", data={ "manifest_identifier": str(manifest_identifier), "uid": str(uid) }) links = self.links_dict[manifest_identifier] row_number = links.find_row_number(parent_trace, uid) return row_number
def preflightPostingValidation(self, parent_trace, posted_content_df): ''' Method performs some initial validation of the `dataframe`, which is intended to be a DataFrame representation of the data posted in Excel. The intention for this preflight validation is to provide the user with more user-friendly error messages that educate the user on what he/she should change in the posting for it to be valid. In the absence of this preflight validation, the posting error from the user would eventually be caught deeper in the parsing logic, by which time the error generated might not be too user friendly. Thus this method is not so much to avoid corruption of the data, since downstream logic will prevent corruption anyway. Rather, it is to provide usability by outputting high-level user-meaningful error messages. ''' ME = Mock_Controller._AccountPropertiesConfig posted_cols = list(posted_content_df.columns) mandatory_cols = [ME._ENTITY_NAME] mandatory_cols.extend(ME._SPLITTING_COLUMNS) missing_cols = [col for col in mandatory_cols if not col in posted_cols] if len(missing_cols) > 0: raise ApodeixiError(parent_trace, "Posting lacks some mandatory columns. This often happens if " + "ranges are wrong in Posting Label.", data = { 'Missing columns': missing_cols, 'Posted columns': posted_cols})
def buildIntervals(self, parent_trace, linear_space): ''' ''' #if self.entity_name == None: # Overwrite self.entity_name to be consistent with the linear space given self.entity_name = IntervalUtils().infer_first_entity( parent_trace, linear_space) my_trace = parent_trace.doing( "Validating mandatory columns are present") missing_cols = [ col for col in self.mandatory_columns if not col in linear_space ] if len(missing_cols) > 0: raise ApodeixiError(my_trace, "Posting lacks some mandatory columns", data={ 'Missing columns': missing_cols, 'Posted columns': linear_space }) return [Interval(parent_trace, linear_space, self.entity_name)]
def add_known_uid(self, parent_trace, uid, last_acronym=None): ''' Records that the `uid` is already used, and therefore no generated UID should be like it. Use with caution: normally this method should not be used, since normally a user's posting should normally include a UID that was generated previously during processing of an earlier posting that gave rise to a manifest being persisted. That manifest would have the UID and normally if the user makes an update to the posting, that update controller logic would call the method `initializeFromManifest` on the UID Store to seed the UID Store with such previously generated UIDs. So it rare when we need to *forcefully* tell the UID store that a UID is already reserved, and mainly in internal Apodeix low-level code, not by application code. @param uid A string such as "JTBD1.C1.F1.S1" @param last_acronym A string for the entity of the leaf UID. In the example "JTBD1.C1.F1.S1", perhaps "S" stands for "Story", and "S" would be the last_acronym passed. The reason for needing this parameter is that for usability reasons the user may abbreviate the UID to something like "1.1.1.1", and the system needs to infer the acronyms. The UID Store would already now about the ancestors (JTBD, C, F) but might not yet know about the leaf UID acronym ("S"), which is therefore passed by the caller (the caller typically is the BreakdownTree class that would know how to get such "last acronym") ''' if self.acronym_schema == None: raise ApodeixiError( parent_trace, "Detected incorrectly built UID_Store while adding a known uid: this " + "UID_Store's acronym schema is not initialized", {"uid": str(uid)}) known_acronym_list = [ info.acronym for info in self.acronym_schema.acronym_infos() ] self._mark_uid_as_used(parent_trace, uid, known_acronym_list, self.tree)