def test_output_generator(self, filename, record_num, record_name, expected_level, expected_text): test_data_file = os.path.join(input_files_root, test_file_folder_relative, filename) test_ppt_template = os.path.join(input_files_root, test_file_folder_relative, "ppt_template_02.pptx") test_ppt_output_path = os.path.join(output_files_root, test_file_folder_relative, "ppt_output_02.pptx") outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['', ':']) data_nodes = unleashed_outline.extract_data_nodes() data_node = unleashed_outline.list_unleashed_nodes()[ data_nodes[0]['data_node_list_index']].node() data_node_descriptor = DataNodeSpecifier(dns) data_node_table = data_node_descriptor.extract_data_node_dispatch( data_node) PptOutputGeneratorSimple.generate_ppt(data_node_table, test_ppt_output_path, test_ppt_template) ppt_records = list(get_slide_data(test_ppt_output_path)) test_level, test_text = ppt_records[record_num] self.assertEqual(expected_level, test_level, f"Failed on {record_name}") self.assertEqual(expected_text, test_text)
def test_data_node_extract_04(self, index, key1, key2, non_key1, non_key2, non_key3): """ Args: index: key1: key2: non_key1: non_key2: non_key3: """ data_node_index = 31 data_node = self.outline_node_list[data_node_index].node() data_node_specifier = DataNodeSpecifier(test_data_node_specifier_03x) extracted_data_records = data_node_specifier.extract_data_node_dispatch( data_node) test_record = extracted_data_records[index] self.assertEqual(key1, test_record['key_field_1']) self.assertEqual(key2, test_record['key_field_2']) self.assertEqual(non_key1, test_record['data_field_1']) self.assertEqual(non_key2, test_record['data_field_2']) self.assertEqual(non_key3, test_record['data_field_3'])
def test_extract_all_field_names(self): specifier = DataNodeSpecifier(data_node_specifier_test_driver[0]) key_field_names = specifier.extract_field_names(primary_key_only=None) expected_field_names = [ 'risk_description', 'likelihood', 'impact', 'mitigation' ] self.assertEqual(expected_field_names, key_field_names)
def test_data_node_extract_05(self): data_node_index = 46 data_node = self.outline_node_list[data_node_index].node() data_node_specifier = DataNodeSpecifier(test_data_node_specifier_05x) extracted_data_records = data_node_specifier.extract_data_node_dispatch( data_node) pass
def test_data_node_extract_03(self): data_node_index = 31 data_node = self.outline_node_list[data_node_index].node() data_node_specifier = DataNodeSpecifier(test_data_node_specifier_03x) extracted_data_records = data_node_specifier.extract_data_node_dispatch( data_node) self.assertEqual(3, len(extracted_data_records))
def test_data_node_extract_01(self): data_node = self.outline_node_list[1].node() data_node_specifier = DataNodeSpecifier( test_data_node_specifier_ppt_01) extracted_data_records = data_node_specifier.extract_data_node_dispatch( data_node) expected_num_records = 21 self.assertEqual(expected_num_records, len(extracted_data_records))
def test_data_node_freeform_notes(self): data_node_index = 1 data_node = self.outline_node_list[data_node_index].node() data_node_specifier = DataNodeSpecifier( test_data_node_specifier_freeform_notes) extracted_data_records = data_node_specifier.extract_data_node_dispatch( data_node) pass
def test_json_desc_version_number(self): """ Tests ability to parse a file with heading levels mapped directly to outline level. So Outline Level 1 is Heading Level 1 etc. Also uses JSON syntax which doesn't specify all matching criteria - these should be assigned None in the JSON translation. :return: """ json_specifier_generic_levels = os.path.join( self.test_root, "custom_json_test_descriptors_generic_levels.json") opml_file_name = os.path.join(self.test_root, "custom_json_test_descriptors.opml") root_node_index = 1 expected_number_of_rows = 32 # Read json file into a string (later this will be done within the outline engine). with open(json_specifier_generic_levels, 'r') as f: json_specifier_string = f.read() # Read opml file into an outline outline = Outline.from_opml(opml_file_name) unleashed_outline = UnleashedOutline(outline) node_list = list(unleashed_outline.iter_unleashed_nodes()) root_node = node_list[root_node_index].node() specifier = DataNodeSpecifier.from_json_string(json_specifier_string) extracted_data_nodes = specifier.extract_data_node_dispatch(root_node) self.assertEqual(expected_number_of_rows, len(extracted_data_nodes))
def test_match_data_node(self, specifier_index, data_node_index, expected_field_data): """ Args: specifier_index: data_node_index: expected_field_data: """ specifier = DataNodeSpecifier( data_node_specifier_test_driver[specifier_index]) # text_tag_override = specifier.dns_structure['header']['tag_delimiters']['text_delimiters'] # data_node_ancestry_record = self.node_list[data_node_index] # data_node_ancestry_record.text_tag_regex = text_tag_override data_node = self.node_list[data_node_index].node() matched_data_items = specifier.match_data_node(data_node) self.assertEqual(expected_field_data, matched_data_items)
def test_output_generator_csv_01(self, opml_filename, output_filename, ignored_json_file, row, col, expected_result): """ Creates csv file from outline using in memory data node specifier. NOTE: A JSON file name is passed in as the generator is used across both test methods, but in this method is ignored. :param opml_filename: :param output_filename: :param row: :param col: :param expected_result: :return: """ test_data_file = os.path.join(input_files_root, "output_generator", opml_filename) test_csv_output_path = os.path.join(output_files_root, "output_generator", output_filename) outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['[*', '*]']) data_nodes = unleashed_outline.extract_data_nodes() data_node_name = data_nodes[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() data_node_descriptor = DataNodeSpecifier( data_node_specifier_csv_test_01) data_table = data_node_descriptor.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path) result_checker = CsvTestChecker(test_csv_output_path) self.assertTrue(result_checker.check(row, col, expected_result))
def test_data_node_extract_02(self, index, section, slide, bullet): """ Args: index: section: slide: bullet: """ data_node_index = 1 data_node = self.outline_node_list[data_node_index].node() data_node_specifier = DataNodeSpecifier( test_data_node_specifier_ppt_01) extracted_data_records = data_node_specifier.extract_data_node_dispatch( data_node) test_record = extracted_data_records[index] self.assertEqual(section, test_record['section_name']) self.assertEqual(slide, test_record['slide_name']) self.assertEqual(bullet, test_record['bullet'])
def setUp(self) -> None: data_node_index = 1 data_node_specifier = DataNodeSpecifier( data_node_specifier_test_driver[0]) tag_delimiters_text = ('[*', '*]') outline_file_path = os.path.join(tcfg.input_files_root, 'data_node_descriptor', 'opml_data_extraction_test_01.opml') outline = Outline.from_opml(outline_file_path) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=tag_delimiters_text) data_node = unleashed_outline.list_unleashed_nodes( )[data_node_index].node() self.extracted_data_table = data_node_specifier.extract_data_node_dispatch( data_node) pass
def test_match_field_node(self, data_node_index, field_node_index, expected_field_name, expected_field_value): # Now extract the test data node (root node of sub-tree where field nodes are located) """ Args: data_node_index: field_node_index: expected_field_name: expected_field_value: """ data_node = self.node_list[data_node_index] data_node_list = data_node.node().list_unleashed_nodes() field_node = data_node_list[field_node_index] specifier = DataNodeSpecifier(data_node_specifier_test_driver[0]) match_data = list(specifier.match_field_node(field_node)) self.assertEqual(1, len(match_data)) self.assertIsNotNone(match_data[0]) field_name, field_value = match_data[0] self.assertEqual(expected_field_name, field_name) self.assertEqual(expected_field_value, field_value)
def test_data_node_extract_07(self, index, category, note, date): """ Args: index: category: note: date: """ data_node_index = 73 data_node = self.outline_node_list[data_node_index].node() data_node_specifier = DataNodeSpecifier(test_data_node_specifier_07) extracted_data_records = data_node_specifier.extract_data_node_dispatch( data_node, override_data_node_tag_delim=True) if category is None: # Signals end of list and that the test is just to check number of records self.assertEqual(index, len(extracted_data_records), "Wrong number of records") else: test_record = extracted_data_records[index] self.assertEqual(category, test_record['category']) self.assertEqual(note, test_record['item']) self.assertEqual(date, test_record['date_due'])
def test_opml_from_json(self, index, key1, key2, non_key1, non_key2, non_key3): """ Data driven test to check that a data node specifier record imported from JSON can be used correctly to parse a data node and get correct results. The intention isn't to do a full test of data node extract functionality but to use sufficiently complex data to provide confidence that the from_json functionality is working. :param index: Index of the node under the data node where the data is to be checked. :param key1: First key field expected to be in the extracted data :param key2: Second key field expected to be in the extracted data :param non_key1: Expected data :param non_key2: Expected data :param non_key3: Expected data :return: """ descriptor = DataNodeSpecifier.from_json_string(serialized_json_specifier_03x) # tag_text_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['text_delimiters']) # tag_note_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['note_delimiters']) # Use descriptor to process a node and check that output results are correct. data_node_index = 31 outline = Outline.from_opml( os.path.join(test_root, 'opml_data_extraction_test_02.opml'), ) unleashed_outline = UnleashedOutline(outline) outline_node_list = unleashed_outline.list_unleashed_nodes() data_node = outline_node_list[data_node_index].node() extracted_data_records = descriptor.extract_data_node_dispatch(data_node) test_record = extracted_data_records[index] self.assertEqual(key1, test_record['key_field_1']) self.assertEqual(key2, test_record['key_field_2']) self.assertEqual(non_key1, test_record['data_field_1']) self.assertEqual(non_key2, test_record['data_field_2']) self.assertEqual(non_key3, test_record['data_field_3'])
def test_output_generator_csv_02(self, opml_filename, output_filename, json_dns_file, row, col, expected_result): """ Creates csv file from outline using JSON data node specifier. :param opml_filename: :param output_filename: :param row: :param col: :param expected_result: :return: """ test_data_file = os.path.join(input_files_root, "output_generator", opml_filename) test_csv_output_path = os.path.join(output_files_root, "output_generator", output_filename) data_node_descriptor_file = os.path.join(input_files_root, "data_node_descriptor", json_dns_file) outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['[*', '*]']) data_nodes = unleashed_outline.extract_data_nodes() data_node_name = data_nodes[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() data_node_descriptor = DataNodeSpecifier.from_json_file( data_node_descriptor_file) data_table = data_node_descriptor.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path) result_checker = CsvTestChecker(test_csv_output_path) self.assertTrue(result_checker.check(row, col, expected_result))
def main(): num_arguments = len(sys.argv) expected_num_arguments = 3 # Note command line arguments will be one more as filename is first opml_path = "" json_path = "" csv_path = "" if num_arguments == 1: # Temporary hack to allow debugging. No parameters supplied --> use test files. print("Debug mode - hard coded arguments for command line") opml_path = "tests/test_resources/opml_data_extraction_test_01.opml" json_path = "tests/test_resources/custom_json_test_descriptors_risk_01.json" csv_path = "tests/test_resources/output_files/opml_data_extraction_test_01.csv" elif num_arguments != expected_num_arguments + 1: print(f"Wrong number of arguments ({num_arguments - 1} (should be {expected_num_arguments})") else: opml_path = sys.argv[1] json_path = sys.argv[2] csv_path = sys.argv[3] outline = Outline.from_opml(opml_path) print("Successfully read outline, unleashing...") unleashed_outline = UnleashedOutline(outline) data_nodes = unleashed_outline.extract_data_nodes() print(f"Outline is unleashed, there are {len(data_nodes)} data nodes in this outline") for index, node in enumerate(data_nodes): print(f"{index}: {node['data_node_name']}") print("Processing first node") data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes()[data_node_list_index].node() data_node_specifier = DataNodeSpecifier.from_json_file(json_path) extracted_data_table = data_node_specifier.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(extracted_data_table, csv_path)
def test_match_node(self, data_node_index, field_node_index, expected_field_name, expected_field_value): # Now extract the test data node (root node of sub-tree where field nodes are located) """ Args: data_node_index: field_node_index: expected_field_name: expected_field_value: """ data_node = self.node_list[data_node_index] data_node_list = list(data_node.node().iter_unleashed_nodes()) field_node = data_node_list[field_node_index] criteria_01 = data_node_specifier_test_driver[0] criteria_02 = criteria_01['descriptor'] criteria_03 = criteria_02[expected_field_name] test_matching_criteria = criteria_03['ancestry_matching_criteria'] # data_node_specifier_test_driver[0]['descriptor'][expected_field_name]['ancestry_matching_criteria'] # Confirm that the field node matches with the appropriate criteria. self.assertTrue( DataNodeSpecifier.match_field(field_node, test_matching_criteria))
def test_extract_primary_key_field_names(self): specifier = DataNodeSpecifier(data_node_specifier_test_driver[0]) key_field_names = specifier.extract_field_names(primary_key_only=True) expected_field_names = ['risk_description'] self.assertEqual(expected_field_names, key_field_names)
def test_opml_to_json(self): self.maxDiff = None serialized_descriptor = DataNodeSpecifier.to_json(test_json_descriptor_01) self.assertEqual(serialized_json_01, serialized_descriptor)