def pipeline_to_xml(self, article):
        """
        Pipeline to tranform a dictionary to XML format

        :param list_dict: List of dictionary content key tronsform in a XML.
        """

        ppl = plumber.Pipeline(pipeline_xml.SetupDocument(),
                               pipeline_xml.DocumentID(), pipeline_xml.URL(),
                               pipeline_xml.DOI(), pipeline_xml.Languages(),
                               pipeline_xml.Fulltexts(),
                               pipeline_xml.PublicationDate(),
                               pipeline_xml.Keywords(),
                               pipeline_xml.Collection(),
                               pipeline_xml.DocumentType(),
                               pipeline_xml.Titles(), pipeline_xml.Abstract(),
                               pipeline_xml.Authors(), pipeline_xml.TearDown())

        xmls = ppl.run([article])

        # Add root document
        add = ET.Element('add')

        for xml in xmls:
            add.append(xml)

        return ET.tostring(add, encoding="utf-8", method="xml")
Exemple #2
0
 def test_transform(self):
     text = """<root xmlns:dc="http://www.openarchives.org/OAI/2.0/provenance">
     <record>
         <metadata>
             <oai_dc:dc
                 xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
                 xmlns:dc="http://purl.org/dc/elements/1.1/"
                 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                 xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/
                 http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
                 <dc:subject xml:lang="es-ES">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="es-ES">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="pt-BR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="es-ES">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="pt-BR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
                 <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject>
             </oai_dc:dc>
         </metadata>
     </record>
     </root>
     """
     xml = ET.Element("doc")
     raw = ET.fromstring(text)
     data = raw, xml
     raw, xml = pipeline_xml.Keywords().transform(data)
     self.assertEqual(3, len(xml.findall(".//field[@name='keyword_es']")))
     self.assertEqual(2, len(xml.findall(".//field[@name='keyword_pt']")))
     self.assertEqual(4, len(xml.findall(".//field[@name='keyword_fr']")))
    def test_keywords(self):

        pxml = ET.Element('doc')

        data = [self._article_meta, pxml]

        xmlarticle = pipeline_xml.Keywords()

        raw, xml = xmlarticle.transform(data)

        result = sorted([i.text for i in xml.findall('./field[@name="keyword_pt"]')])

        self.assertEqual([u'Insuficiência Renal Crônica', u'Registros de Mortalidade', u'Sistemas de Informação Hospitalar', u'Terapia de Substituição Renal'], result)
Exemple #4
0
    def pipeline_to_xml(self, article):
        """
        Pipeline to tranform a dictionary to XML format

        :param list_dict: List of dictionary content key tronsform in a XML.
        """

        ppl = plumber.Pipeline(pipeline_xml.SetupDocument(),
                               pipeline_xml.DocumentID(), pipeline_xml.DOI(),
                               pipeline_xml.Collection(),
                               pipeline_xml.DocumentType(), pipeline_xml.URL(),
                               pipeline_xml.Authors(), pipeline_xml.Titles(),
                               pipeline_xml.OriginalTitle(),
                               pipeline_xml.Pages(), pipeline_xml.WOKCI(),
                               pipeline_xml.WOKSC(),
                               pipeline_xml.JournalAbbrevTitle(),
                               pipeline_xml.Languages(),
                               pipeline_xml.AvailableLanguages(),
                               pipeline_xml.Fulltexts(),
                               pipeline_xml.PublicationDate(),
                               pipeline_xml.SciELOPublicationDate(),
                               pipeline_xml.SciELOProcessingDate(),
                               pipeline_xml.Abstract(),
                               pipeline_xml.AffiliationCountry(),
                               pipeline_xml.AffiliationInstitution(),
                               pipeline_xml.Sponsor(), pipeline_xml.Volume(),
                               pipeline_xml.SupplementVolume(),
                               pipeline_xml.Issue(),
                               pipeline_xml.SupplementIssue(),
                               pipeline_xml.ElocationPage(),
                               pipeline_xml.StartPage(),
                               pipeline_xml.EndPage(),
                               pipeline_xml.JournalTitle(),
                               pipeline_xml.IsCitable(),
                               pipeline_xml.Permission(),
                               pipeline_xml.Keywords(),
                               pipeline_xml.JournalISSNs(),
                               pipeline_xml.SubjectAreas(),
                               pipeline_xml.ReceivedCitations(),
                               pipeline_xml.TearDown())

        xmls = ppl.run([article])

        # Add root document
        add = ET.Element('add')

        for xml in xmls:
            add.append(xml)

        return ET.tostring(add, encoding="utf-8", method="xml")
    def test_without_keywords(self):

        pxml = ET.Element('doc')

        del(self._article_meta.data['article']['v85'])
        data = [self._article_meta, pxml]

        xmlarticle = pipeline_xml.Keywords()

        raw, xml = xmlarticle.transform(data)

        result = xml.find('./field[@name="keyword_pt"]')

        self.assertEqual(None, result)