def pipeline_to_xml(self, article): """ Pipeline to tranform a dictionary to XML format :param list_dict: List of dictionary content key tronsform in a XML. """ ppl = plumber.Pipeline(pipeline_xml.SetupDocument(), pipeline_xml.DocumentID(), pipeline_xml.URL(), pipeline_xml.DOI(), pipeline_xml.Languages(), pipeline_xml.Fulltexts(), pipeline_xml.PublicationDate(), pipeline_xml.Keywords(), pipeline_xml.Collection(), pipeline_xml.DocumentType(), pipeline_xml.Titles(), pipeline_xml.Abstract(), pipeline_xml.Authors(), pipeline_xml.TearDown()) xmls = ppl.run([article]) # Add root document add = ET.Element('add') for xml in xmls: add.append(xml) return ET.tostring(add, encoding="utf-8", method="xml")
def test_transform(self): text = """<root xmlns:dc="http://www.openarchives.org/OAI/2.0/provenance"> <record> <metadata> <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"> <dc:subject xml:lang="es-ES">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="es-ES">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="pt-BR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="es-ES">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="pt-BR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> <dc:subject xml:lang="fr-FR">COVID-19 in Brazil: advantages of a socialized unified health system and preparation to contain cases</dc:subject> </oai_dc:dc> </metadata> </record> </root> """ xml = ET.Element("doc") raw = ET.fromstring(text) data = raw, xml raw, xml = pipeline_xml.Keywords().transform(data) self.assertEqual(3, len(xml.findall(".//field[@name='keyword_es']"))) self.assertEqual(2, len(xml.findall(".//field[@name='keyword_pt']"))) self.assertEqual(4, len(xml.findall(".//field[@name='keyword_fr']")))
def test_keywords(self): pxml = ET.Element('doc') data = [self._article_meta, pxml] xmlarticle = pipeline_xml.Keywords() raw, xml = xmlarticle.transform(data) result = sorted([i.text for i in xml.findall('./field[@name="keyword_pt"]')]) self.assertEqual([u'Insuficiência Renal Crônica', u'Registros de Mortalidade', u'Sistemas de Informação Hospitalar', u'Terapia de Substituição Renal'], result)
def pipeline_to_xml(self, article): """ Pipeline to tranform a dictionary to XML format :param list_dict: List of dictionary content key tronsform in a XML. """ ppl = plumber.Pipeline(pipeline_xml.SetupDocument(), pipeline_xml.DocumentID(), pipeline_xml.DOI(), pipeline_xml.Collection(), pipeline_xml.DocumentType(), pipeline_xml.URL(), pipeline_xml.Authors(), pipeline_xml.Titles(), pipeline_xml.OriginalTitle(), pipeline_xml.Pages(), pipeline_xml.WOKCI(), pipeline_xml.WOKSC(), pipeline_xml.JournalAbbrevTitle(), pipeline_xml.Languages(), pipeline_xml.AvailableLanguages(), pipeline_xml.Fulltexts(), pipeline_xml.PublicationDate(), pipeline_xml.SciELOPublicationDate(), pipeline_xml.SciELOProcessingDate(), pipeline_xml.Abstract(), pipeline_xml.AffiliationCountry(), pipeline_xml.AffiliationInstitution(), pipeline_xml.Sponsor(), pipeline_xml.Volume(), pipeline_xml.SupplementVolume(), pipeline_xml.Issue(), pipeline_xml.SupplementIssue(), pipeline_xml.ElocationPage(), pipeline_xml.StartPage(), pipeline_xml.EndPage(), pipeline_xml.JournalTitle(), pipeline_xml.IsCitable(), pipeline_xml.Permission(), pipeline_xml.Keywords(), pipeline_xml.JournalISSNs(), pipeline_xml.SubjectAreas(), pipeline_xml.ReceivedCitations(), pipeline_xml.TearDown()) xmls = ppl.run([article]) # Add root document add = ET.Element('add') for xml in xmls: add.append(xml) return ET.tostring(add, encoding="utf-8", method="xml")
def test_without_keywords(self): pxml = ET.Element('doc') del(self._article_meta.data['article']['v85']) data = [self._article_meta, pxml] xmlarticle = pipeline_xml.Keywords() raw, xml = xmlarticle.transform(data) result = xml.find('./field[@name="keyword_pt"]') self.assertEqual(None, result)