def transform_xml(xml_asset_path, identifier): "modify the XML" # remove history tags from XML for certain article types root = parse.parse_article_xml(xml_asset_path) soup = parser.parse_document(xml_asset_path) root = transform_xml_history_tags(root, soup, identifier) write_xml_file(root, xml_asset_path, identifier)
def test_parse_article_xml_entities(self): xml_file_path = os.path.join(self.temp_dir, "test.xml") with open(xml_file_path, "w") as open_file: open_file.write("<article>—<>&"β</article>") expected = b'<article>—<>&"β</article>' root = parse.parse_article_xml(xml_file_path) self.assertIsNotNone(root) self.assertEqual(ElementTree.tostring(root), expected)
def test_file_list(self): zip_file = "tests/test_data/30-01-2019-RA-eLife-45644.zip" asset_file_name_map = zip_lib.unzip_zip(zip_file, self.temp_dir) xml_asset = parse.article_xml_asset(asset_file_name_map) root = parse.parse_article_xml(xml_asset[1]) expected = read_fixture("file_list_45644.py") files = parse.file_list(root) self.assertEqual(files, expected)
def transform_code_files(asset_file_name_map, output_dir, identifier): "zip code files if they are not already a zip file" # parse XML file xml_asset = parse.article_xml_asset(asset_file_name_map) xml_asset_path = xml_asset[1] root = parse.parse_article_xml(xml_asset_path) file_transformations = code_file_transformations( root, asset_file_name_map, output_dir, identifier ) code_file_zip(file_transformations, output_dir, identifier) # create a new asset map new_asset_file_name_map = transform_asset_file_name_map( asset_file_name_map, file_transformations ) xml_rewrite_file_tags(xml_asset_path, file_transformations, identifier) return new_asset_file_name_map
def glencoe_xml(xml_file_path, video_data, pretty=True, indent=""): "generate XML to be submitted to Glencoe" # build an Article object from the XML article, error_count = article_from_xml(xml_file_path) # collect journal data from the XML elementtree root = parse.parse_article_xml(xml_file_path) journal_ids = parse.xml_journal_id_values(root) filtered_journal_ids = { key: value for key, value in journal_ids.items() if key in JOURNAL_ID_TYPES } journal_title = parse.xml_journal_title(root) publisher_name = parse.xml_publisher_name(root) journal_data = { "journal_ids": filtered_journal_ids, "journal_title": journal_title, "publisher_name": publisher_name, } return generate_xml(article, journal_data, video_data, pretty, indent)
def xml_rewrite_file_tags(xml_asset_path, file_transformations, identifier): root = parse.parse_article_xml(xml_asset_path) # rewrite the XML tags LOGGER.info("%s rewriting xml tags", identifier) root = transform_xml_file_tags(root, file_transformations) write_xml_file(root, xml_asset_path, identifier)
def test_parse_article_xml_failure(self): xml_file_path = os.path.join(self.temp_dir, "test.xml") with open(xml_file_path, "w") as open_file: open_file.write("malformed xml") with self.assertRaises(ElementTree.ParseError): parse.parse_article_xml(xml_file_path)
def test_parse_article_xml(self): xml_file_path = os.path.join(self.temp_dir, "test.xml") with open(xml_file_path, "w") as open_file: open_file.write("<article/>") root = parse.parse_article_xml(xml_file_path) self.assertIsNotNone(root)