def test_warning_utils(self): root_trace = FunctionalTrace( parent_trace=None, path_mask=self._path_mask).doing("Testing Warning Utils") try: TEST_SCENARIO = 'test_warning_utils' my_trace = root_trace.doing("Testing a fake warning") with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(my_trace, warnings_list=w) warnings.warn("Test warning for Warning Utils", DeprecationWarning) WarningUtils().handle_warnings(my_trace, warning_list=w) # The handling of the warning should raise an exception, so we should not get here self.assertTrue(1 == 2) except ApodeixiError as ex: output_txt = ex.trace_message() self._compare_to_expected_txt(parent_trace=my_trace, output_txt=output_txt, test_output_name=TEST_SCENARIO, save_output_txt=True)
def save(self, parent_trace, data_dict, path, use_cache=True): ''' ''' # As documented in https://nbconvert.readthedocs.io/en/latest/execute_api.html # # May get an error like this unless we explicity use UTF8 encoding: # # File "C:\Alex\CodeImages\technos\anaconda3\envs\ea-journeys-env\lib\encodings\cp1252.py", line 19, in encode # return codecs.charmap_encode(input,self.errors,encoding_table)[0] # UnicodeEncodeError: 'charmap' codec can't encode character '\u2610' in position 61874: character maps to <undefined> # # Happens in particular when trying to save a string representing a Jupyter notebook's execution, since for the same # reason above that string had to be written to a string using UTF8 encoding, so now if we save to a file we must use UTF8 with open(path, 'w', encoding="utf8") as file: # YAML invokes asyncio.base_events.py, that is noisy and issues spurious ResourceWarnings. So catch and suppress # such warnings. For other warnings, raise an ApodeixiError with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(parent_trace, warnings_list=w) _yaml.dump(data_dict, file) #, Dumper=YAML_DUMPER) if use_cache: _YAML_CACHE[path] = data_dict WarningUtils().handle_warnings(parent_trace, warning_list=w)
def replicate_dataframe(self, parent_trace, seed_df, categories_list): ''' Creates and returns a DataFrame, by replicating the `seed_df` for each member of the `categories_list`, and concatenating them horizonally. The columns are also added a new top level, from `categories_list`. A usecase where this is used is to create templates for product-related manifests where similar content must exist per sub-product. Example: Suppose a product has subproducts ["Basic", "Premium], and this is provided as the `categories_list`. Suppose the `seed_df` is some estimates about the product, such as: bigRock FY 19 FY 20 FY 21 ================================ 0 None 150 150 150 1 None 100 100 100 2 None 0 0 0 3 None 45 45 45 4 None 0 0 0 5 None 300 300 300 6 None 140 140 140 Then this method would return the following DataFrame Basic | Premium bigRock FY 19 FY 20 FY 21 | bigRock FY 19 FY 20 FY 21 ==================================================================== 0 None 150 150 150 None 150 150 150 1 None 100 100 100 None 100 100 100 2 None 0 0 0 None 0 0 0 3 None 45 45 45 None 45 45 45 4 None 0 0 0 None 0 0 0 5 None 300 300 300 None 300 300 300 6 None 140 140 140 None 140 140 140 @param categories_list A list of hashable objects, such as strings or ints ''' with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(parent_trace, warnings_list=w) dfs_dict = {} for category in categories_list: dfs_dict[category] = seed_df.copy() replicas_df = _pd.concat(dfs_dict, axis=1) WarningUtils().handle_warnings(parent_trace, warning_list=w) return replicas_df
def load(self, parent_trace, path, use_cache=True): ''' Returns a dictionary, corresponding to the loaded representation of the YAML file in the given `path` ''' if use_cache and path in _YAML_CACHE.keys(): return _YAML_CACHE[path] try: with open(path, 'r', encoding="utf8") as file: # YAML invokes asyncio.base_events.py, that is noisy and issues spurious ResourceWarnings. So catch and suppress # such warnings. For other warnings, raise an ApodeixiError with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(parent_trace, warnings_list=w) loaded_dict = _yaml.load(file, Loader=_yaml.FullLoader) if use_cache: _YAML_CACHE[path] = loaded_dict WarningUtils().handle_warnings(parent_trace, warning_list=w) return loaded_dict except Exception as ex: raise ApodeixiError(parent_trace, "Found a problem loading YAML file", data = {"path": str(path), "error": str(ex)})
def form(kb_session, posting_api, namespace, subnamespace, dry_run, environment, timestamp): ''' Requests a form (an Excel spreadsheet) which (after some edits, as appropriate) can be used as the input to the post command. ''' timer = ApodeixiTimer() func_trace = FunctionalTrace(parent_trace=None, path_mask=None) root_trace = func_trace.doing("CLI call to post", origination={'signaled_from': __file__}) kb_operation_succeeded = False try: # Catch warnings and handle them so that we avoid spurious noise in the CLI due to noisy 3rd party libraries with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(root_trace, warnings_list=w) if environment != None: kb_session.store.activate(parent_trace=root_trace, environment_name=environment) click.echo(CLI_Utils().sandox_announcement(environment)) elif dry_run == True: sandbox_name = kb_session.provisionSandbox(root_trace) click.echo(CLI_Utils().sandox_announcement(sandbox_name)) ''' else: raise ApodeixiError(root_trace, "Sorry, only sandbox-isolated runs are supported at this time. Aborting.") ''' # Now that we have pinned down the environment (sandbox or not) in which to call the KnowledgeBase's services, # set that environment's tag to use for KnoweldgeBase's posting logs, if the user set it. if timestamp: kb_session.store.current_environment(root_trace).config( root_trace).use_timestamps = timestamp my_trace = root_trace.doing( "Invoking KnowledgeBase's requestForm service") output_dir = _os.getcwd() clientURL = kb_session.store.getClientURL(my_trace) relative_path, void = PathUtils().relativize(parent_trace=my_trace, root_dir=clientURL, full_path=output_dir) form_request = kb_session.store.getBlindFormRequest( parent_trace=my_trace, relative_path=relative_path, posting_api=posting_api, namespace=namespace, subnamespace=subnamespace) response, log_txt, rep = kb_session.kb.requestForm( parent_trace=my_trace, form_request=form_request) kb_operation_succeeded = True manifests_description = CLI_Utils().describe_req_form_response( my_trace, form_request_response=response, store=kb_session.store, representer=rep) click.echo(manifests_description) output = "Success" click.echo(output) click.echo(timer.elapsed_time_message()) WarningUtils().handle_warnings(root_trace, warning_list=w) except ApodeixiError as ex: error_msg = CLI_ErrorReporting(kb_session).report_a6i_error( parent_trace=root_trace, a6i_error=ex) if kb_operation_succeeded: error_msg = "KnowledgeBase operation completed, but run into a problem when preparing "\ + "a description of the response:\n"\ + error_msg # GOTCHA # Use print, not click.echo or click exception because they don't correctly display styling # (colors, underlines, etc.). So use vanilla Python print and then exit print(error_msg) _sys.exit() except Exception as ex: try: error_msg = CLI_ErrorReporting(kb_session).report_generic_error( parent_trace=root_trace, generic_error=ex) if kb_operation_succeeded: error_msg = "KnowledgeBase operation completed, but run into a problem when preparing "\ + "a description of the response:\n"\ + error_msg except Exception as ex2: error_msg = "CLI run into trouble: found error:\n\n\t" + str(ex) + "\n\n" \ + "To make things worse, when trying to produce an error log file with a "\ + "stack trace, run into an additional error:\n\n\t" + str(ex2) # GOTCHA # Use print, not click.echo or click exception because they don't correctly display styling # (colors, underlines, etc.). So use vanilla Python print and then exit print(error_msg) _sys.exit()
def skeleton_test(self, parent_trace, cli_command_list, output_cleanining_lambda, when_to_check_environment=PER_COMMAND): ''' @param when_to_check_environment A string enum, that determines how frequently to check the contents of the environment as the CLI commands execulte. Possible values: * CLI_Test_Skeleton.PER_COMMAND * CLI_Test_Skeleton.ONLY_AT_END * CLI_Test_Skeleton.NEVER ''' ME = CLI_Test_Skeleton try: my_trace = self.trace_environment(parent_trace, "Isolating test case") if self.provisioned_env_name == None: # This is the second time we provision the isolated environment, but now with a different context, i.e., # different self.a6i_config and different self.test_config_dict than the first time we provisioned # an isolated environment, which was in self.setUp. See comments there. The environment provisioned # here is a child of the one configured in self.setUp, and is fo self.provisionIsolatedEnvironment(my_trace) if when_to_check_environment == ME.PER_COMMAND: self.check_environment_contents(my_trace) self.provisioned_env_name = self.stack().store( ).current_environment(my_trace).name(my_trace) else: self.stack().store().activate(my_trace, self.provisioned_env_name) my_trace = self.trace_environment( parent_trace, "Invoking " + str(len(cli_command_list)) + " commands") if True: runner = CliRunner() for raw_command_argv in cli_command_list: # The raw_command_arv might include some lambdas that need to be evaluated not in order to # determine the real argv to pass to the CLI. The reason there might be lambdas is that some # parameters for some commands can only be determined after earlier commands are run, so they # aren't known when the command list was defined, and only now that we have run prior commands # can it be determined. # Example: # The sandbox to use, if flag "--sandbox" is on. That can only be known after # self.sandbox is set, which happens when the first command runs. def _unraw_param(param): if callable(param): return param() else: # As a precaution, make sure we return a string. Otherwise, if param is an int, # click will later through some exception return str(param) # Note: two operations are being done here: # # 1) Replacing a "delayed parameter": a parameter that couldn't be given when the caller's code was # written, but can at runtime, so the "delayed parameter" is a callable that, if called, would return # the actual parameter to use. Example: the sandbox parameter, which is determined in the first # post of the script and must be passed to all subsequent post commands so they continue the work # in a common sandbox. # 2) Filtering out nulls. That is a trick to enable the caller, for example, to use the same script # for both dry runs and live runs. All the caller has to do is set the "--sandbox <sandbox>" to a # value when using the script with a sandbox, and to None when doing it live. command_argv = [ _unraw_param(param) for param in raw_command_argv if param != None ] loop_trace = self.trace_environment( my_trace, "Executing '" + " ".join([str(cmd) for cmd in command_argv]) + "'") # Some Python libraries can be too noisy with warnings, and these get printed out to standard err/output # where the CLI will regard as "part of output" and display them in regression test output. This makes # regression output both ugly and sometimes non-deterministc. # To remedy this, we change the warning context to catch all warnings and based on what we catch, either # 1. raise an ApodeixiError so that the Apodeixi developer can change the code construct that led to the # warning, possible as the ApodeixiError will include a stack trace to pin point where in the Apodeixi # code the warning was triggered, # 2. or ignore the warning if that is pure noise and no code change in Apodeixi could prevent it from being # triggered # with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(parent_trace, warnings_list=w) result = runner.invoke(self.cli, command_argv) WarningUtils().handle_warnings(parent_trace, warning_list=w) if result.exit_code != 0: raise ApodeixiError(loop_trace, "CLI command failed", data={ "CLI exit code": str(result.exit_code), "CLI exception": str(result.exc_info), "CLI output": str(result.output), "CLI traceback": str(result.exc_info) }) sandbox = CLI_Utils().infer_sandbox_name( loop_trace, result.output) if sandbox != None: # We only overwrite self.sandbox if this particular command chose a sandbox. Otherwise # we retain whatever self.sandbox was set by prior commands. This is important since some commands # don't have a --sandbox option (Example: get namespaces), but that does not mean that # our intention is to switch out of the sandbox and into the parent environment. self.sandbox = sandbox command_flags = [ token for token in command_argv if token.startswith("--") ] if command_argv[0] in ["post"]: argv_without_arguments = command_argv[:1] elif command_argv[0] in ["get"]: argv_without_arguments = command_argv[:2] elif command_argv[0] in ["import"]: argv_without_arguments = command_argv[:2] elif command_argv[0] in ["diff"]: argv_without_arguments = command_argv[:1] else: raise ApodeixiError( my_trace, "Command not recognized: '" + str(command_argv[0]) + "'") argv_without_arguments.extend( command_flags) # Like post --dry-run # Once we are done building it, command_without_flag_params will be something like # # => post --dry-run products.static-data.admin.a6i.xlsx # # or # # => post --sandbox products.static-data.admin.a6i.xlsx # # hence it will be suitable for inclusion in deterministic output. For example, we remove # timestamp-sensitive sandbox names (if any) and also the full path for the posted file. command_without_flag_params = " ".join( argv_without_arguments) if command_argv[0] in ["post"] or command_argv[:2] in [[ "get", "form" ]]: # These are commands with a unique argument. Other commands lack it path_posted = command_argv[-1] unique_argument = _os.path.split(path_posted)[1] command_without_flag_params += " " + unique_argument elif command_argv[:2] in [["import", "aha"]]: args = command_argv[-4:] command_without_flag_params += " " + " ".join(args) output_to_display = "=> " + command_without_flag_params + "\n\n" if output_cleanining_lambda == None: output_to_display += result.output else: output_to_display += output_cleanining_lambda( result.output) self.check_cli_output( parent_trace=loop_trace, cli_output=output_to_display, cli_command=" ".join( argv_without_arguments) # Like post --dry-run ) if when_to_check_environment == ME.PER_COMMAND: self._check_CLI_environment(loop_trace) if when_to_check_environment == ME.ONLY_AT_END: # We display the consolidated effect of all commands in the script onto the KnowledgeBase used by the CLI self._check_CLI_environment(my_trace) my_trace = self.trace_environment(parent_trace, "Deactivating environment") self.stack().store().deactivate(my_trace) except ApodeixiError as ex: click.echo(ex.trace_message()) self.assertTrue(1 == 2)
def run(self, parent_trace): # Catch warnings and handle them so that we avoid spurious noise in the CLI due to noisy 3rd party libraries with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(parent_trace, warnings_list=w) # As documented in https://nbconvert.readthedocs.io/en/latest/execute_api.html # # May get an error like this unless we explicity use UTF8 encoding: # # File "C:\Alex\CodeImages\technos\anaconda3\envs\ea-journeys-env\lib\encodings\cp1252.py", line 19, in encode # return codecs.charmap_encode(input,self.errors,encoding_table)[0] # UnicodeEncodeError: 'charmap' codec can't encode character '\u2610' in position 61874: character maps to <undefined> # my_trace = parent_trace.doing("Attempting to load notebook") try: with open(self.src_folder + '/' + self.src_filename, encoding="utf8") as f: nb = _nbformat.read(f, as_version=4) except Exception as ex: raise ApodeixiError( "Encountered this error while loading notebook: " + str(ex), data={ 'src_folder': self.src_folder, 'src_filename': self.src_filename }) my_trace = parent_trace.doing("Attempting to execute notebook") try: #ep = ExecutePreprocessor(timeout=600, kernel_name='python3') ep = ExecutePreprocessor( timeout=600 ) # Use virtual-env's kernel, so don't specify: kernel_name='python3' ep.preprocess( nb, {'metadata': { 'path': self.destination_folder + '/' }} ) # notebook executes in the directory specified by the 'path' metadata field except Exception as ex: raise ApodeixiError( my_trace, "Encountered this error while executing notebook: " + str(ex), data={ 'src_folder': self.src_folder, 'src_filename': self.src_filename }) my_trace = parent_trace.doing("Attempting to save notebook") try: if self.destination_folder != None and self.destination_filename != None: with open(self.destination_folder + '/' + self.destination_filename, 'w', encoding='utf-8') as f: _nbformat.write(nb, f) except Exception as ex: raise ApodeixiError( "Encountered this error while executing notebook: " + str(ex), data={ 'destination_folder': self.destination_folder, 'destination_filename': self.destination_filename }) WarningUtils().handle_warnings(parent_trace, warning_list=w) my_trace = parent_trace.doing( "Converting notebook to dictionary after executing it") return NotebookUtils._val_to_dict(my_trace, nb)
def read(self, parent_trace): ''' Loads the Apodeixi object in Excel that this ExcelTableReader was initialized for, and returns it as a Pandas DataFrame ''' my_trace = parent_trace.doing("Parsing excel range", data={ "excel_range": str(self.excel_range), "excel sheet": str(self.excel_sheet) }) first_column, last_column, first_row, last_row = ExcelTableReader.parse_range( my_trace, self.excel_range) header_list, nrows = self.xlr_config.pandasRowParameters( parent_trace, first_row, last_row) if len(header_list) != self.xlr_config.nb_header_levels: raise ApodeixiError( my_trace, "Internal problem: inconsistency as to the number of headers expected in Excel", data={ "excel_fullpath": str(self.excel_fullpath), "excel sheet": str(self.excel_sheet), "excel range": str(self.excel_range), "# headers inferred": str(len(header_list)), "# headers configured": str(self.xlr_config.nb_header_levels) }) my_trace = parent_trace.doing("Loading Excel spreadsheet", data={ "excel_fullpath": str(self.excel_fullpath), "excel sheet": str(self.excel_sheet) }) try: # Pandas sometimes issues future warnings. By default, these are printed to stderr, which can mess up the # deterministic requirement for regression test output. # So we would rather have an exception be thrown so that we know of where in the Apodeixi code base a code # construct needs to be made future-proof. That is why we use the warnings context manager here with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(parent_trace, warnings_list=w) # We have two cases: # 1. header_list is a singleton - this is the "normal" case, and we can use the `usecols` parameter # in Pandas read_excel # 2. header_list has multiple elements. This means we have a MultiLevel index in the DataFrame-to-be, # and unfortunately Pandas disallows `usecols` in that case. That makes the logic more complicated, # because after calling Pandas::read_excel we have to prune spurious columns that might have been # picked up by Pandas::read_excel, since we couldn't tell it to just use columns in the range # first_column:last_column # # Case #2 is more general than #1, so the logic for #2 could address #1 as well. However, since historically # Apodeixi only supported #1, we retain the simpler code for #1 as a defensive quality tactic: if for # some reason our implementation of #2 is buggy, we don't want that bug to affect the previously working # functionality for usecase #1 # So we explicitly have an "if-else" statement for the two cases, even if in theory that's unnecessary # if len(header_list) == 1: df = _pd.read_excel(io=self.excel_fullpath, sheet_name=self.excel_sheet, header=header_list, usecols=first_column + ':' + last_column, nrows=nrows) elif len(header_list) > 1: # This is the MultiLevel index case, and can' t use `usecols` in the call to Pandas::read_excel, so must # first call read_excel and after that prune the result to confine to the desired columns # raw_df = _pd.read_excel(io=self.excel_fullpath, sheet_name=self.excel_sheet, header=header_list, nrows=nrows) # We need to convert the letter columns to integers, before we can prune them. That requires a little # helper function inspired by # https://stackoverflow.com/questions/7261936/convert-an-excel-or-spreadsheet-column-letter-to-its-number-in-pythonic-fashion def _col2num(col): ''' Converts Excel letter columns to ints, starting at 0 ''' num = 0 for c in col: if c in _string.ascii_letters: num = num * 26 + (ord(c.upper()) - ord('A')) + 1 return num - 1 first_col_nb = _col2num(first_column) last_col_nb = _col2num(last_column) raw_columns = list(raw_df.columns) # Prune spurious columns on the left df = raw_df.drop(raw_columns[:first_col_nb], axis=1) # Prune columns on the right df = df.drop(raw_columns[last_col_nb + 1:], axis=1) else: raise ApodeixiError( parent_trace, "Can't load Excel file because no headers were specified", data={ "path": str(self.excel_fullpath), "sheet_name": str(self.sheet_name), "range": str(self.excel_range) }) WarningUtils().handle_warnings(parent_trace, warning_list=w) except PermissionError as ex: raise ApodeixiError( my_trace, "Was not allowed to access excel file. Perhaps you have it open?", data={ "excel_fullpath": str(self.excel_fullpath), "excel sheet": str(self.excel_sheet), "error": str(ex) }, origination={ 'concrete class': str(self.__class__.__name__), 'signaled_from': __file__ }) except ValueError as ex: error_msg = str(ex) if error_msg.startswith( "Worksheet named '") and error_msg.endswith("' not found"): raise ApodeixiError( my_trace, "Are you missing the Posting Label, or perhaps you have a typo or " + "missing value in the Posting Label's 'data.sheet' fields? " + "\nGot this error:" + "\n\n" + error_msg) else: raise ApodeixiError( my_trace, "Found an error while reading the Excel file", data={'error': error_msg}) except FileNotFoundError as ex: error_msg = str(ex) if error_msg.startswith( "Worksheet named '") and error_msg.endswith("' not found"): raise ApodeixiError( my_trace, "Is your Posting Label right in the Excel spreadsheet? Got this error:" + "\n\n" + error_msg) else: raise ApodeixiError( my_trace, "Found an error while reading the Excel file", data={'error': error_msg}) my_trace = parent_trace.doing( "Validating data loaded from Excel is not empty") if len(df.columns) == 0: raise ApodeixiError( my_trace, "Incorrectly formatted Excel range was given: '" + self.excel_range + "'. It spans no columns with data", data={ "excel_fullpath": str(self.excel_fullpath), "excel sheet": str(self.excel_sheet) }, origination={ 'concrete class': str(self.__class__.__name__), 'signaled_from': __file__ }) if len(df.index) == 0: raise ApodeixiError( my_trace, "Incorrectly formatted Excel range was given: '" + self.excel_range + "'. It spans no rows with data", data={ "excel_fullpath": str(self.excel_fullpath), "excel sheet": str(self.excel_sheet) }, origination={ 'concrete class': str(self.__class__.__name__), 'signaled_from': __file__ }) my_trace = parent_trace.doing( "Computing manifest DataFrame from raw DataFrame loaded from Excel" ) manifest_df = self.xlr_config.toManifestDF(parent_trace=my_trace, raw_df=df, first_row=first_row, last_row=last_row) return manifest_df