def test_read_invalid_xml_01(self, filename, expected_error_msg): full_pathname = os.path.join(input_files_root, self.local_path, filename) try: Outline.from_opml(full_pathname) except ParseError as err: self.assertTrue(test_string_segment(expected_error_msg, err.msg)) except Exception as gen_err: self.fail(f"Unexpected exception raised {gen_err}") else: self.fail( f'Exception expected but wasn\'t raised. Msg {expected_error_msg}' )
def test_opml_creation_nodes(self, opml_file_name, node_sequence_number, exp_text_value, exp_note_value): input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name) output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01) outline_01 = Outline.from_opml(input_opml_path) outline_01.write_opml(output_opml_path) outline_02 = Outline.from_opml(output_opml_path) node_list = outline_02.list_nodes() test_node = node_list[node_sequence_number].node() self.assertEqual(exp_text_value, test_node.text) self.assertEqual(exp_note_value, test_node.note)
def test_output_generator(self, filename, record_num, record_name, expected_level, expected_text): test_data_file = os.path.join(input_files_root, test_file_folder_relative, filename) test_ppt_template = os.path.join(input_files_root, test_file_folder_relative, "ppt_template_02.pptx") test_ppt_output_path = os.path.join(output_files_root, test_file_folder_relative, "ppt_output_02.pptx") outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['', ':']) data_nodes = unleashed_outline.extract_data_nodes() data_node = unleashed_outline.list_unleashed_nodes()[ data_nodes[0]['data_node_list_index']].node() data_node_descriptor = DataNodeSpecifier(dns) data_node_table = data_node_descriptor.extract_data_node_dispatch( data_node) PptOutputGeneratorSimple.generate_ppt(data_node_table, test_ppt_output_path, test_ppt_template) ppt_records = list(get_slide_data(test_ppt_output_path)) test_level, test_text = ppt_records[record_num] self.assertEqual(expected_level, test_level, f"Failed on {record_name}") self.assertEqual(expected_text, test_text)
def __init__(self, outline_path, default_tag_delimiters): """ :param outline_path: Full path to opml file for outline. :param default_tag_delimiters: Tag delimiter to use if one isn't supplied for a given descriptor. """ self.outline = Outline.from_opml(outline_path) pass
def test_opml_creation_header_fields(self, opml_file_name, field_name, expected_field_value): """ Reads an OPML file, writes it out, then reads it in again to check that nothing was changed in writing it. :return: """ input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name) output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01) outline_01 = Outline.from_opml(input_opml_path) outline_01.write_opml(output_opml_path) outline_02 = Outline.from_opml(output_opml_path) actual_field_value = getattr(outline_02, field_name) self.assertEqual(expected_field_value, actual_field_value)
def setUp(self) -> None: self.unleashed_outline = UnleashedOutline( Outline.from_opml( os.path.join(self.test_root, 'opml_data_extraction_test_02.opml'))) self.outline_node_list = self.unleashed_outline.list_unleashed_nodes()
def test_json_desc_version_number(self): """ Tests ability to parse a file with heading levels mapped directly to outline level. So Outline Level 1 is Heading Level 1 etc. Also uses JSON syntax which doesn't specify all matching criteria - these should be assigned None in the JSON translation. :return: """ json_specifier_generic_levels = os.path.join( self.test_root, "custom_json_test_descriptors_generic_levels.json") opml_file_name = os.path.join(self.test_root, "custom_json_test_descriptors.opml") root_node_index = 1 expected_number_of_rows = 32 # Read json file into a string (later this will be done within the outline engine). with open(json_specifier_generic_levels, 'r') as f: json_specifier_string = f.read() # Read opml file into an outline outline = Outline.from_opml(opml_file_name) unleashed_outline = UnleashedOutline(outline) node_list = list(unleashed_outline.iter_unleashed_nodes()) root_node = node_list[root_node_index].node() specifier = DataNodeSpecifier.from_json_string(json_specifier_string) extracted_data_nodes = specifier.extract_data_node_dispatch(root_node) self.assertEqual(expected_number_of_rows, len(extracted_data_nodes))
def test_iter_nodes(self): outline = Outline.from_opml(opml_path=self.input_file_full_path, full_validate=True) for index, unleashed_node in enumerate(outline.iter_nodes()): print(f'Node {index}: {unleashed_node.node()}') self.assertTrue(True)
def test_identify_data_node_01(self, file_path, expected_node_index, field_name, expected_field_value): outline = Outline.from_opml(file_path) unleashed_outline = UnleashedOutline(outline) data_nodes = unleashed_outline.extract_data_nodes() expected_node_record = data_nodes[expected_node_index] self.assertEqual(expected_field_value, expected_node_record[field_name])
def setUp(self) -> None: outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'outline', 'opml', 'opml-test-valid-opml-01.opml')) self.unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=text_tag_regex, default_note_tag_delimiter=note_tag_regex)
def setUp(self) -> None: tag_delimiters_text = ('[*', '*]') outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'data_node_descriptor', 'opml_data_extraction_test_01.opml'), tag_delimiters_text) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=tag_delimiters_text) # Create list of all nodes (plus ancestry) to allow access to nodes by index. self.node_list = list(unleashed_outline.iter_unleashed_nodes())
def test_head_data_from_opml(self, field_name, expected_value): """ Args: field_name: expected_value: """ outline = Outline.from_opml( os.path.join(tcfg.input_files_root, self.local_path, self.test_data_file)) # Access field by attribute name to allow data driven approach for test value = getattr(outline, field_name) self.assertEqual(expected_value, value)
def test_outline_create_01(self, node_sequence_number, parent_node_number, level, text, note): generate_outline_from_test_data() outline = Outline.from_opml(test_opml_path) test_record = outline.get_node(node_sequence_number) test_item = test_record[-1] test_node = test_item.node self.assertEqual(level, test_record.depth) self.assertEqual(text, test_node.text) self.assertEqual(note, test_node.note)
def test_unleashed_outline(self, node_number, text, note, text_tag, note_tag): outline = Outline.from_opml(input_file_path) unleashed_outline = UnleashedOutline(outline, ['(-', '-)'], ['(-', '-)']) node_record = unleashed_outline.get_node(node_number) node_item = node_record[-1] node = node_item.node self.assertEqual(text, node.text) self.assertEqual(note, node.note) self.assertEqual(text_tag, node.text_tag) self.assertEqual(note_tag, node.note_tag)
def test_child_access(self): outline = Outline.from_opml(self.test_outline) top_level_node = outline.top_outline_node num_child_nodes = len(top_level_node) self.assertEqual(3, num_child_nodes) child_outline_nodes = list(top_level_node) self.assertEqual(3, len(child_outline_nodes)) for index, child in enumerate(top_level_node): self.assertIsInstance(child, OutlineNode) self.assertIsInstance(child_outline_nodes[index], OutlineNode) self.assertEqual(child, child_outline_nodes[index])
def test_unleashed_outline_node(self): outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'data_node_descriptor', 'opml_data_extraction_test_02.opml')) unleashed_outline = UnleashedOutline(outline) unleashed_nodes = list(unleashed_outline.iter_unleashed_nodes()) node_ancestry_record = unleashed_nodes[1] depth = node_ancestry_record.depth item = node_ancestry_record[1] child_number = item.child_number pass
def test_opml_from_json(self, index, key1, key2, non_key1, non_key2, non_key3): """ Data driven test to check that a data node specifier record imported from JSON can be used correctly to parse a data node and get correct results. The intention isn't to do a full test of data node extract functionality but to use sufficiently complex data to provide confidence that the from_json functionality is working. :param index: Index of the node under the data node where the data is to be checked. :param key1: First key field expected to be in the extracted data :param key2: Second key field expected to be in the extracted data :param non_key1: Expected data :param non_key2: Expected data :param non_key3: Expected data :return: """ descriptor = DataNodeSpecifier.from_json_string(serialized_json_specifier_03x) # tag_text_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['text_delimiters']) # tag_note_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['note_delimiters']) # Use descriptor to process a node and check that output results are correct. data_node_index = 31 outline = Outline.from_opml( os.path.join(test_root, 'opml_data_extraction_test_02.opml'), ) unleashed_outline = UnleashedOutline(outline) outline_node_list = unleashed_outline.list_unleashed_nodes() data_node = outline_node_list[data_node_index].node() extracted_data_records = descriptor.extract_data_node_dispatch(data_node) test_record = extracted_data_records[index] self.assertEqual(key1, test_record['key_field_1']) self.assertEqual(key2, test_record['key_field_2']) self.assertEqual(non_key1, test_record['data_field_1']) self.assertEqual(non_key2, test_record['data_field_2']) self.assertEqual(non_key3, test_record['data_field_3'])
def test_output_generator_csv_02(self, opml_filename, output_filename, json_dns_file, row, col, expected_result): """ Creates csv file from outline using JSON data node specifier. :param opml_filename: :param output_filename: :param row: :param col: :param expected_result: :return: """ test_data_file = os.path.join(input_files_root, "output_generator", opml_filename) test_csv_output_path = os.path.join(output_files_root, "output_generator", output_filename) data_node_descriptor_file = os.path.join(input_files_root, "data_node_descriptor", json_dns_file) outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['[*', '*]']) data_nodes = unleashed_outline.extract_data_nodes() data_node_name = data_nodes[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() data_node_descriptor = DataNodeSpecifier.from_json_file( data_node_descriptor_file) data_table = data_node_descriptor.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path) result_checker = CsvTestChecker(test_csv_output_path) self.assertTrue(result_checker.check(row, col, expected_result))
def test_output_generator_csv_01(self, opml_filename, output_filename, ignored_json_file, row, col, expected_result): """ Creates csv file from outline using in memory data node specifier. NOTE: A JSON file name is passed in as the generator is used across both test methods, but in this method is ignored. :param opml_filename: :param output_filename: :param row: :param col: :param expected_result: :return: """ test_data_file = os.path.join(input_files_root, "output_generator", opml_filename) test_csv_output_path = os.path.join(output_files_root, "output_generator", output_filename) outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['[*', '*]']) data_nodes = unleashed_outline.extract_data_nodes() data_node_name = data_nodes[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() data_node_descriptor = DataNodeSpecifier( data_node_specifier_csv_test_01) data_table = data_node_descriptor.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path) result_checker = CsvTestChecker(test_csv_output_path) self.assertTrue(result_checker.check(row, col, expected_result))
def test_get_node_01(self, file_name, node_to_get, item_to_test, expected_value): input_file_path = os.path.join(input_files_root, relative_folder, file_name) outline = Outline.from_opml(input_file_path) ancestry_record_to_test: NodeAncestryRecord = outline.get_node(node_to_get) ancestry_item_to_test: NodeAncestryItem = ancestry_record_to_test[-1] node_to_test: OutlineNode = ancestry_item_to_test.node if item_to_test == "level": self.assertEqual(expected_value, ancestry_record_to_test.depth) elif item_to_test == "child_number": self.assertEqual(expected_value, ancestry_item_to_test.child_number) elif item_to_test == "text": self.assertEqual(expected_value, node_to_test.text) elif item_to_test == "note": self.assertEqual(expected_value, node_to_test.note) else: self.fail(f"Unrecognised item to tes {item_to_test}")
def main(): num_arguments = len(sys.argv) expected_num_arguments = 3 # Note command line arguments will be one more as filename is first opml_path = "" json_path = "" csv_path = "" if num_arguments == 1: # Temporary hack to allow debugging. No parameters supplied --> use test files. print("Debug mode - hard coded arguments for command line") opml_path = "tests/test_resources/opml_data_extraction_test_01.opml" json_path = "tests/test_resources/custom_json_test_descriptors_risk_01.json" csv_path = "tests/test_resources/output_files/opml_data_extraction_test_01.csv" elif num_arguments != expected_num_arguments + 1: print(f"Wrong number of arguments ({num_arguments - 1} (should be {expected_num_arguments})") else: opml_path = sys.argv[1] json_path = sys.argv[2] csv_path = sys.argv[3] outline = Outline.from_opml(opml_path) print("Successfully read outline, unleashing...") unleashed_outline = UnleashedOutline(outline) data_nodes = unleashed_outline.extract_data_nodes() print(f"Outline is unleashed, there are {len(data_nodes)} data nodes in this outline") for index, node in enumerate(data_nodes): print(f"{index}: {node['data_node_name']}") print("Processing first node") data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes()[data_node_list_index].node() data_node_specifier = DataNodeSpecifier.from_json_file(json_path) extracted_data_table = data_node_specifier.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(extracted_data_table, csv_path)
def test_field_access(self): outline = Outline.from_opml(self.test_outline) top_level_node = outline.top_outline_node # Access the top level OutlineNode object # Check that accessing child node gets the right one node_01 = top_level_node[0] self.assertEqual('H1:Heading A', node_01.text) self.assertEqual('Notes for Heading A', node_01.note) # Check that accessing sub-nodes from top level works ok. # Note that (unleashed) tags are in the text but (correctly) not recognised by outline node. # Also note that white space is NOT ignored in the tag text as it isn't recognised as a tag. node_01_01 = top_level_node[0][0] self.assertEqual(' (-TAG-TEXT-H2B-)H2: Heading B', node_01_01.text) self.assertEqual('Notes for Heading C', node_01_01.note) # Check that two ways of getting to the same node reveal the same one. node_01_01_01a = node_01_01[0] node_01_01_01b = top_level_node[0][0][0] self.assertEqual(node_01_01_01a, node_01_01_01b)
def setUp(self) -> None: data_node_index = 1 data_node_specifier = DataNodeSpecifier( data_node_specifier_test_driver[0]) tag_delimiters_text = ('[*', '*]') outline_file_path = os.path.join(tcfg.input_files_root, 'data_node_descriptor', 'opml_data_extraction_test_01.opml') outline = Outline.from_opml(outline_file_path) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=tag_delimiters_text) data_node = unleashed_outline.list_unleashed_nodes( )[data_node_index].node() self.extracted_data_table = data_node_specifier.extract_data_node_dispatch( data_node) pass
def test_output_generator_ppt_01(self, filename, record_num, record_name, expected_level, expected_text): test_data_file = os.path.join(tcfg.input_files_root, test_file_folder_relative, filename) test_ppt_template = os.path.join(tcfg.input_files_root, test_file_folder_relative, "ppt_template_01.pptx") test_ppt_output_path = os.path.join(tcfg.output_files_root, test_file_folder_relative, "ppt_output_01.pptx") outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['', ':']) data_node_generators = unleashed_outline.extract_data_nodes() data_node_name = data_node_generators[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_node_generators[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() generator = PowerPointGenerator() generator.create_power_point_skeleton(data_node, test_ppt_template, test_ppt_output_path) ppt_records = list(get_slide_data(test_ppt_output_path)) test_level, test_text = ppt_records[record_num] self.assertEqual(expected_level, test_level, f"Failed on {record_name}") self.assertEqual(expected_text, test_text)
def setUp(self) -> None: self.outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'outline', 'outline_node', 'outline-test-valid-01.opml'))
def test_valid_opml_01(self, filename): full_pathname = os.path.join(input_files_root, self.local_path, filename) try: Outline.from_opml(full_pathname) except Exception as gen_err: self.fail(f"Unexpected exception raised {gen_err}")
def setUp(self) -> None: self.outline = Outline.from_opml( os.path.join(self.folder_from_resources_root, 'opml-test-valid-opml-01.opml'))