Exemple #1
0
def pack_article_xml(file_xml_path):
    original_filename, ign = files.extract_filename_ext_by_path(file_xml_path)

    obj_xml = xml.file2objXML(file_xml_path)

    sps_package = SPS_Package(obj_xml, original_filename)

    SPS_PKG_PATH = config.get("SPS_PKG_PATH")
    INCOMPLETE_SPS_PKG_PATH = config.get("INCOMPLETE_SPS_PKG_PATH")

    pkg_path = os.path.join(SPS_PKG_PATH, original_filename)
    bad_pkg_path = os.path.join(INCOMPLETE_SPS_PKG_PATH, original_filename)

    asset_replacements = list(set(sps_package.replace_assets_names()))
    logger.info("%s possui %s ativos digitais", file_xml_path,
                len(asset_replacements))

    renditions, renditions_metadata = sps_package.get_renditions_metadata()
    logger.info("%s possui %s renditions", file_xml_path, len(renditions))

    package_path = packing_assets(asset_replacements + renditions, pkg_path,
                                  bad_pkg_path, sps_package.package_name)

    files.write_file(os.path.join(package_path, "manifest.json"),
                     json.dumps(renditions_metadata))

    xml.objXML2file(
        os.path.join(package_path, "%s.xml" % (sps_package.package_name)),
        obj_xml)
Exemple #2
0
def pack_article_xml(file_xml_path):
    original_filename, ign = files.extract_filename_ext_by_path(file_xml_path)

    obj_xml = xml.file2objXML(file_xml_path)

    sps_package = SPS_Package(obj_xml, original_filename)

    SPS_PKG_PATH = config.get("SPS_PKG_PATH")
    INCOMPLETE_SPS_PKG_PATH = config.get("INCOMPLETE_SPS_PKG_PATH")

    pkg_path = os.path.join(SPS_PKG_PATH, original_filename)
    bad_pkg_path = os.path.join(INCOMPLETE_SPS_PKG_PATH, original_filename)

    files.make_empty_dir(pkg_path)

    asset_replacements = list(set(sps_package.replace_assets_names()))
    logger.info("%s possui %s ativos digitais", file_xml_path,
                len(asset_replacements))

    package_path = packing_assets(asset_replacements, pkg_path, bad_pkg_path,
                                  sps_package.package_name)

    xml.objXML2file(
        os.path.join(package_path, "%s.xml" % (sps_package.package_name)),
        obj_xml)
Exemple #3
0
class Test_SPS_Package(unittest.TestCase):
    def setUp(self):
        article_xml = """<root xmlns:xlink="http://www.w3.org/1999/xlink">
                <inline-graphic xlink:href="a01tab01.gif"/>
                <graphic xlink:href="a01f01.gif"/>
                <ext-link xlink:href="a01tab02.gif"/>
                <ext-link xlink:href="mailto:a01f02.gif"/>
                <inline-supplementary-material xlink:href="a01tab03.gif"/>
                <supplementary-material xlink:href="a01tab04.gif"/>
                <media xlink:href="a01tab04.gif"/>
            </root>
            """
        self.sps_package = SPS_Package(etree.fromstring(article_xml), "a01")

    def test_elements_which_has_xlink_href(self):
        items = list(self.sps_package.elements_which_has_xlink_href)
        self.assertEqual(len(items), 7)
        self.assertEqual(
            [node.tag for node in items],
            sorted([
                "inline-graphic",
                "graphic",
                "ext-link",
                "ext-link",
                "inline-supplementary-material",
                "supplementary-material",
                "media",
            ]),
        )

    def test_replace_assets(self):
        expected = [
            ("a01tab02.gif", "a01-gtab02"),
            ("a01f01.gif", "a01-gf01"),
            ("a01tab01.gif", "a01-gtab01"),
            ("a01tab03.gif", "a01-gtab03"),
            ("a01tab04.gif", "a01-gtab04"),
            ("a01tab04.gif", "a01-gtab04"),
        ]
        items = self.sps_package.replace_assets_names()
        self.assertEqual(len(items), 6)
        for i, item in enumerate(items):
            with self.subTest(i):
                self.assertEqual(expected[i][0], item[0])
                self.assertEqual(expected[i][1], item[1])
Exemple #4
0
def pack_article_xml(file_xml_path, poison_pill=PoisonPill()):
    """Empacoda um xml e seus ativos digitais.

    Args:
        file_xml_path: Caminho para o XML
        poison_pill: Injeta um PosionPill()

    Retornos:
        Sem retornos.

        Persiste o XML no ``package_path``

    Exemplo:
        packing.pack_article_xml(
                os.path.join("S0044-59672003000300002.xml")
            )

    Exceções:
        Não lança exceções.
    """
    if poison_pill.poisoned:
        return

    original_filename, ign = files.extract_filename_ext_by_path(file_xml_path)

    obj_xml = xml.file2objXML(file_xml_path)

    sps_package = SPS_Package(obj_xml, original_filename)
    sps_package.fix("article_id_which_id_type_is_other",
                    sps_package.scielo_pid_v2
                    and sps_package.scielo_pid_v2[-5:],
                    silently=True)
    new_issns = ISSNs and ISSNs.get(sps_package.scielo_pid_v2[1:10])
    if new_issns:
        sps_package.fix("issns", new_issns, silently=True)

    SPS_PKG_PATH = config.get("SPS_PKG_PATH")
    INCOMPLETE_SPS_PKG_PATH = config.get("INCOMPLETE_SPS_PKG_PATH")

    pkg_path = os.path.join(SPS_PKG_PATH, original_filename)
    incomplete_pkg_path = os.path.join(INCOMPLETE_SPS_PKG_PATH,
                                       original_filename)

    asset_replacements = list(set(sps_package.replace_assets_names()))
    logger.debug("%s possui %s ativos digitais", file_xml_path,
                 len(asset_replacements))

    source_json = get_source_json(sps_package.scielo_pid_v2)
    renditions, renditions_metadata = source_json.get_renditions_metadata()
    logger.debug("%s possui %s renditions", file_xml_path, len(renditions))

    package_path = packing_assets(
        asset_replacements + renditions,
        pkg_path,
        incomplete_pkg_path,
        sps_package.package_name,
        sps_package.scielo_pid_v2,
    )

    files.write_file(os.path.join(package_path, "manifest.json"),
                     json.dumps(renditions_metadata))
    xml.objXML2file(
        os.path.join(package_path, "%s.xml" % (sps_package.package_name)),
        obj_xml)
Exemple #5
0
class Test_SPS_Package(unittest.TestCase):
    def setUp(self):
        article_xml = """<root xmlns:xlink="http://www.w3.org/1999/xlink">
                <inline-graphic xlink:href="a01tab01.gif"/>
                <graphic xlink:href="a01f01.gif"/>
                <ext-link xlink:href="a01tab02.gif"/>
                <ext-link xlink:href="mailto:a01f02.gif"/>
                <inline-supplementary-material xlink:href="a01tab03.gif"/>
                <supplementary-material xlink:href="a01tab04.gif"/>
                <media xlink:href="a01tab04.gif"/>
            </root>
            """
        self.sps_package = SPS_Package(etree.fromstring(article_xml), "a01")

    def test_elements_which_has_xlink_href(self):
        items = list(self.sps_package.elements_which_has_xlink_href)
        self.assertEqual(len(items), 7)
        self.assertEqual(
            [node.tag for node in items],
            sorted([
                "inline-graphic",
                "graphic",
                "ext-link",
                "ext-link",
                "inline-supplementary-material",
                "supplementary-material",
                "media",
            ]),
        )

    def test_replace_assets(self):
        expected = [
            ("a01tab02.gif", "a01-gtab02"),
            ("a01f01.gif", "a01-gf01"),
            ("a01tab01.gif", "a01-gtab01"),
            ("a01tab03.gif", "a01-gtab03"),
            ("a01tab04.gif", "a01-gtab04"),
            ("a01tab04.gif", "a01-gtab04"),
        ]
        items = self.sps_package.replace_assets_names()
        self.assertEqual(len(items), 6)
        for i, item in enumerate(items):
            with self.subTest(i):
                self.assertEqual(expected[i][0], item[0])
                self.assertEqual(expected[i][1], item[1])

    @mock.patch("documentstore_migracao.export.sps_package.article.get_article"
                )
    def test_get_renditions_metadata_no_renditions(self, mk_get_article):
        mk_article = mock.Mock()
        mk_article.fulltexts.return_value = {}
        mk_get_article.return_value = mk_article
        renditions, renditions_metadata = self.sps_package.get_renditions_metadata(
        )
        self.assertEqual(renditions, [])
        self.assertEqual(renditions_metadata, {})

    @mock.patch("documentstore_migracao.export.sps_package.article.get_article"
                )
    def test_get_renditions_metadata(self, mk_get_article):
        fulltexts = {
            "pdf": {
                "en": "http://www.scielo.br/pdf/aa/v1n1/a01.pdf",
                "pt": "http://www.scielo.br/pdf/aa/v1n1/pt_a01.pdf",
            },
            "html": {
                "en":
                "http://www.scielo.br/scielo.php?script=sci_arttext&tlng=en",
                "pt":
                "http://www.scielo.br/scielo.php?script=sci_arttext&tlng=pt",
            },
        }
        mk_article = mock.Mock()
        mk_article.fulltexts.return_value = fulltexts
        mk_get_article.return_value = mk_article
        renditions, renditions_metadata = self.sps_package.get_renditions_metadata(
        )
        for lang, link in fulltexts.get("pdf"):
            self.assertEqual(renditions, [
                ('http://www.scielo.br/pdf/aa/v1n1/a01.pdf', 'a01'),
                ('http://www.scielo.br/pdf/aa/v1n1/pt_a01.pdf', 'pt_a01'),
            ])
            self.assertEqual(
                renditions_metadata, {
                    'en': 'http://www.scielo.br/pdf/aa/v1n1/a01.pdf',
                    'pt': 'http://www.scielo.br/pdf/aa/v1n1/pt_a01.pdf',
                })