def transform(self, item): xml, data = item sub = etree.SubElement(xml, 'ListIdentifiers') ppl = plumber.Pipeline( HeaderPipe() ) books = data.get('books') items = ((sub, book) for book in books) results = ppl.run(items) for header in results: pass ppl = plumber.Pipeline( ResumptionTokenPipe() ) resumptionToken = ppl.run([[sub, data]]) for rt in resumptionToken: pass return (xml, data)
def pipeline_sci(self): xylose_article = Article(self._article) ppl = plumber.Pipeline( export_sci.SetupArticlePipe(), export_sci.XMLArticlePipe(), export_sci.XMLFrontPipe(), export_sci.XMLJournalMetaJournalIdPipe(), export_sci.XMLJournalMetaJournalTitleGroupPipe(), export_sci.XMLJournalMetaISSNPipe(), export_sci.XMLJournalMetaCollectionPipe(), export_sci.XMLJournalMetaPublisherPipe(), export_sci.XMLArticleMetaUniqueArticleIdPipe(), export_sci.XMLArticleMetaArticleIdPublisherPipe(), export_sci.XMLArticleMetaArticleIdDOIPipe(), export_sci.XMLArticleMetaArticleCategoriesPipe(), export_sci.XMLArticleMetaTitleGroupPipe(), export_sci.XMLArticleMetaTranslatedTitleGroupPipe(), export_sci.XMLArticleMetaContribGroupPipe(), export_sci.XMLArticleMetaAffiliationPipe(), export_sci.XMLArticleMetaDatesInfoPipe(), export_sci.XMLArticleMetaIssueInfoPipe(), export_sci.XMLArticleMetaElocationInfoPipe(), export_sci.XMLArticleMetaPagesInfoPipe(), export_sci.XMLArticleMetaPermissionPipe(), export_sci.XMLArticleMetaURLsPipe(), export_sci.XMLArticleMetaAbstractsPipe(), export_sci.XMLArticleMetaKeywordsPipe(), export_sci.XMLArticleMetaCitationsPipe(), export_sci.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def pipeline_pubmed(self): xylose_article = Article(self._article, iso_format='iso 639-2') ppl = plumber.Pipeline(export_pubmed.SetupArticleSetPipe(), export_pubmed.XMLArticlePipe(), export_pubmed.XMLJournalPipe(), export_pubmed.XMLPublisherNamePipe(), export_pubmed.XMLJournalTitlePipe(), export_pubmed.XMLISSNPipe(), export_pubmed.XMLVolumePipe(), export_pubmed.XMLIssuePipe(), export_pubmed.XMLPubDatePipe(), export_pubmed.XMLReplacesPipe(), export_pubmed.XMLArticleTitlePipe(), export_pubmed.XMLFirstPagePipe(), export_pubmed.XMLLastPagePipe(), export_pubmed.XMLElocationIDPipe(), export_pubmed.XMLLanguagePipe(), export_pubmed.XMLAuthorListPipe(), export_pubmed.XMLPublicationTypePipe(), export_pubmed.XMLArticleIDListPipe(), export_pubmed.XMLHistoryPipe(), export_pubmed.XMLAbstractPipe(), export_pubmed.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def pipeline_rsps(self): xylose_article = Article(self._article) ppl = plumber.Pipeline(export_rsps.SetupArticlePipe(), export_rsps.XMLArticlePipe(), export_rsps.XMLFrontPipe(), export_rsps.XMLJournalMetaJournalIdPipe(), export_rsps.XMLJournalMetaJournalTitleGroupPipe(), export_rsps.XMLJournalMetaISSNPipe(), export_rsps.XMLJournalMetaPublisherPipe(), export_rsps.XMLArticleMetaArticleIdPublisherPipe(), export_rsps.XMLArticleMetaArticleIdDOIPipe(), export_rsps.XMLArticleMetaArticleCategoriesPipe(), export_rsps.XMLArticleMetaTitleGroupPipe(), export_rsps.XMLArticleMetaTranslatedTitleGroupPipe(), export_rsps.XMLArticleMetaContribGroupPipe(), export_rsps.XMLArticleMetaAffiliationPipe(), export_rsps.XMLArticleMetaGeneralInfoPipe(), export_rsps.XMLArticleMetaHistoryPipe(), export_rsps.XMLArticleMetaPermissionPipe(), export_rsps.XMLArticleMetaAbstractsPipe(), export_rsps.XMLArticleMetaKeywordsPipe(), export_rsps.XMLArticleMetaCountsPipe(), export_rsps.XMLBodyPipe(), export_rsps.XMLArticleMetaCitationsPipe(), export_rsps.XMLSubArticlePipe(), export_rsps.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def pipeline_doaj(self): xylose_article = Article(self._article, iso_format='iso 639-2') ppl = plumber.Pipeline(export_doaj.SetupArticlePipe(), export_doaj.XMLArticlePipe(), export_doaj.XMLJournalMetaPublisherPipe(), export_doaj.XMLJournalMetaJournalTitlePipe(), export_doaj.XMLJournalMetaISSNPipe(), export_doaj.XMLArticleMetaPublicationDatePipe(), export_doaj.XMLArticleMetaVolumePipe(), export_doaj.XMLArticleMetaIssuePipe(), export_doaj.XMLArticleMetaStartPagePipe(), export_doaj.XMLArticleMetaEndPagePipe(), export_doaj.XMLArticleMetaArticleIdDOIPipe(), export_doaj.XMLArticleMetaIdPipe(), export_doaj.XMLArticleMetaDocumentTypePipe(), export_doaj.XMLArticleMetaTitlePipe(), export_doaj.XMLArticleMetaAuthorsPipe(), export_doaj.XMLArticleMetaAffiliationPipe(), export_doaj.XMLArticleMetaAbstractsPipe(), export_doaj.XMLArticleMetaFullTextUrlPipe(), export_doaj.XMLArticleMetaKeywordsPipe(), export_doaj.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def pipeline_to_xml(self, article): """ Pipeline to tranform a dictionary to XML format :param list_dict: List of dictionary content key tronsform in a XML. """ ppl = plumber.Pipeline(pipeline_xml.SetupDocument(), pipeline_xml.DocumentID(), pipeline_xml.URL(), pipeline_xml.DOI(), pipeline_xml.Languages(), pipeline_xml.Fulltexts(), pipeline_xml.PublicationDate(), pipeline_xml.Keywords(), pipeline_xml.Collection(), pipeline_xml.DocumentType(), pipeline_xml.Titles(), pipeline_xml.Abstract(), pipeline_xml.Authors(), pipeline_xml.TearDown()) xmls = ppl.run([article]) # Add root document add = ET.Element('add') for xml in xmls: add.append(xml) return ET.tostring(add, encoding="utf-8", method="xml")
def __init__(self): self._ppl = plumber.Pipeline( self.SetupPipe(), self.DeprecatedHTMLTagsPipe(), self.RemoveExcedingStyleTagsPipe(), self.RemoveEmptyPipe(), self.RemoveStyleAttributesPipe(), self.BRPipe(), self.PPipe(), self.DivPipe(), self.ImgPipe(), self.LiPipe(), self.OlPipe(), self.UlPipe(), self.IPipe(), self.EmPipe(), self.UPipe(), self.BPipe(), self.APipe(), self.StrongPipe(), self.TdCleanPipe(), self.BlockquotePipe(), self.HrPipe(), self.GraphicChildrenPipe(), self.RemoveCommentPipe(), self.BodyAllowedTagPipe(), self.HTMLEscapingPipe(), self.RemovePWhichIsParentOfPPipe(), )
def pipeline_crossref(self): xylose_article = CustomArticle(self._article) ppl = plumber.Pipeline( export_crossref.SetupDoiBatchPipe(), export_crossref.XMLHeadPipe(), export_crossref.XMLBodyPipe(), export_crossref.XMLDoiBatchIDPipe(), export_crossref.XMLTimeStampPipe(), export_crossref.XMLDepositorPipe(), export_crossref.XMLRegistrantPipe(), export_crossref.XMLJournalPipe(), export_crossref.XMLJournalMetadataPipe(), export_crossref.XMLJournalTitlePipe(), export_crossref.XMLAbbreviatedJournalTitlePipe(), export_crossref.XMLISSNPipe(), export_crossref.XMLJournalIssuePipe(), export_crossref.XMLPubDatePipe(), export_crossref.XMLVolumePipe(), export_crossref.XMLIssuePipe(), export_crossref.XMLJournalArticlePipe(), export_crossref.XMLArticleTitlesPipe(), export_crossref.XMLArticleTitlePipe(), export_crossref.XMLArticleContributorsPipe(), export_crossref.XMLArticleAbstractPipe(), export_crossref.XMLArticlePubDatePipe(), export_crossref.XMLPagesPipe(), export_crossref.XMLPIDPipe(), export_crossref.XMLPermissionsPipe(), export_crossref.XMLElocationPipe(), export_crossref.XMLDOIDataPipe(), export_crossref.XMLDOIPipe(), export_crossref.XMLResourcePipe(), export_crossref.XMLCollectionPipe(), export_crossref.XMLArticleCitationsPipe(), export_crossref.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def make_record(record_data, formatter): record = etree.Element('record') ppl = plumber.Pipeline(header, ) xmltree, _ = next(ppl.run((record, record_data), rewrap=True)) xmltree.append(formatter(record_data)) return xmltree
def __init__(self): self._ppl = plumber.Pipeline( self.SetupPipe(), self.GraphicInExtLink(), self.TableinBody(), self.TableinP(), self.AddPinFN(), )
def get_meta_ppl(): ppl = plumber.Pipeline(SetupPipe(), TitlePipe(), AbbrevJournalTitlePipe(), AbstractPipe(), JournalIDPipe(), AuthorPipe(), AffiliationPipe(), KeywordPipe(), DefaultLanguagePipe(), LpagePipe(), FpagePipe(), JournalTitlePipe(), VolumePipe(), NumberPipe(), PubDatePipe(), ISSNPipe(), PublisherNamePipe(), SubjectPipe(), PublisherIDPipe(), TearDownPipe()) return ppl
def __init__(self): self._ppl = plumber.Pipeline(self.SetupCitationPipe(), self.RefIdPipe(), self.ElementCitationPipe(), self.ArticleTitlePipe(), self.ThesisTitlePipe(), self.LinkTitlePipe(), self.SourcePipe(), self.DatePipe(), self.StartPagePipe(), self.EndPagePipe(), self.IssuePipe(), self.VolumePipe(), self.PersonGroupPipe(), self.URIPipe())
def __init__(self): self._ppl = plumber.Pipeline(self.SetupCitationPipe(), self.CitationIdPipe(), self.ISSNPipe(), self.JournalTitlePipe(), self.ThesisTitlePipe(), self.AuthorPipe(), self.VolumePipe(), self.IssuePipe(), self.StartPagePipe(), self.DatePipe(), self.ArticleTitlePipe(), self.ISBNPipe(), self.SeriesTitlePipe(), self.VolumeTitlePipe(), self.EditionPipe())
def __str__(self): ppl = plumber.Pipeline( pipeline.SetupPipe(), pipeline.ResponseDatePipe(), pipeline.RequestPipe(), pipeline.IdentifyNodePipe(), pipeline.TearDownPipe() ) results = ppl.run([self.data]) return next(results)
def transform(self, item): xml, data = item sub = etree.SubElement(xml, 'GetRecord') ppl = plumber.Pipeline( RecordPipe(), ) results = ppl.run(data.get('books')) record = results.next() sub.append(record) return (xml, data)
def __str__(self): ppl = plumber.Pipeline( pipeline.SetupPipe(), pipeline.ResponseDatePipe(), pipeline.RequestPipe(), pipeline.MetadataFormatErrorPipe(), pipeline.TearDownPipe() ) results = ppl.run([self.data]) return next(results)
def transform(self, item): xml, data = item sub = etree.SubElement(xml, 'ListRecords') ppl = plumber.Pipeline( RecordPipe(), ) results = ppl.run(data.get('books')) for record in results: sub.append(record) ppl = plumber.Pipeline( ResumptionTokenPipe() ) resumptionToken = ppl.run([[sub, data]]) for rt in resumptionToken: pass return (xml, data)
def transform(self, data): record = etree.Element('record') ppl = plumber.Pipeline( HeaderPipe(), MetadataPipe() ) results = ppl.run([(record, data)]) for result in results: pass return record
def transform(self, item): xml, data = item sub = etree.SubElement(xml, 'ListSets') ppl = plumber.Pipeline( SetPipe() ) sets = data.get('books') results = ppl.run(sets) for _set in results: sub.append(_set) ppl = plumber.Pipeline( ResumptionTokenPipe() ) resumptionToken = ppl.run([[sub, data]]) for rt in resumptionToken: pass return (xml, data)
def pipeline_to_xml(self, article): """ Pipeline to tranform a dictionary to XML format :param list_dict: List of dictionary content key tronsform in a XML. """ ppl = plumber.Pipeline(pipeline_xml.SetupDocument(), pipeline_xml.DocumentID(), pipeline_xml.DOI(), pipeline_xml.Collection(), pipeline_xml.DocumentType(), pipeline_xml.URL(), pipeline_xml.Authors(), pipeline_xml.Titles(), pipeline_xml.OriginalTitle(), pipeline_xml.Pages(), pipeline_xml.WOKCI(), pipeline_xml.WOKSC(), pipeline_xml.JournalAbbrevTitle(), pipeline_xml.Languages(), pipeline_xml.AvailableLanguages(), pipeline_xml.Fulltexts(), pipeline_xml.PublicationDate(), pipeline_xml.SciELOPublicationDate(), pipeline_xml.SciELOProcessingDate(), pipeline_xml.Abstract(), pipeline_xml.AffiliationCountry(), pipeline_xml.AffiliationInstitution(), pipeline_xml.Sponsor(), pipeline_xml.Volume(), pipeline_xml.SupplementVolume(), pipeline_xml.Issue(), pipeline_xml.SupplementIssue(), pipeline_xml.ElocationPage(), pipeline_xml.StartPage(), pipeline_xml.EndPage(), pipeline_xml.JournalTitle(), pipeline_xml.IsCitable(), pipeline_xml.Permission(), pipeline_xml.Keywords(), pipeline_xml.JournalISSNs(), pipeline_xml.SubjectAreas(), pipeline_xml.ReceivedCitations(), pipeline_xml.TearDown()) xmls = ppl.run([article]) # Add root document add = ET.Element('add') for xml in xmls: add.append(xml) return ET.tostring(add, encoding="utf-8", method="xml")
def listidentifiers(item): xml, data = item listidentifiers_elem = etree.SubElement(xml, 'ListIdentifiers') resources_data = ((listidentifiers_elem, resource) for resource in data.get('resources', [])) add_headers_ppl = plumber.Pipeline(header) for _ in add_headers_ppl.run(resources_data): pass listidentifiers_elem.append( make_resumptiontoken(data.get('resumptionToken', ''))) return item
def __init__(self, pid, index_body=1): self.pid = pid self.index_body = index_body self._ppl = plumber.Pipeline( self.SetupPipe(super_obj=self), self.SaveRawBodyPipe(super_obj=self), self.DeprecatedHTMLTagsPipe(), self.RemoveExcedingStyleTagsPipe(), self.RemoveEmptyPipe(), self.RemoveStyleAttributesPipe(), self.RemoveCommentPipe(), self.HTMLEscapingPipe(), self.BRPipe(), self.PPipe(), self.DivPipe(), self.ANamePipe(super_obj=self), self.TablePipe(), self.ImgPipe(), self.LiPipe(), self.OlPipe(), self.UlPipe(), self.IPipe(), self.EmPipe(), self.UPipe(), self.BPipe(), self.APipe(), self.StrongPipe(), self.TdCleanPipe(), self.TableCleanPipe(), self.BlockquotePipe(), self.HrPipe(), self.GraphicChildrenPipe(), self.RemovePWhichIsParentOfPPipe(), self.RemoveRefIdPipe(), self.SanitizationPipe(), )
def serialize_list_metadata_formats(data): ppl = plumber.Pipeline(root, responsedate, request, listmetadataformats, tobytes) output = next(ppl.run(data, rewrap=True)) return output
def serialize_bad_argument(data): ppl = plumber.Pipeline(root, responsedate, request, badargument, tobytes) output = next(ppl.run(data, rewrap=True)) return output
def StyleCheckingPipeline(): """Factory for style checking pipelines. """ return plumber.Pipeline(setup, funding_group, doctype, country_code, teardown)
def serialize_id_does_not_exist(data): ppl = plumber.Pipeline(root, responsedate, request, iddoesnotexist, tobytes) output = next(ppl.run(data, rewrap=True)) return output
def serialize_cannot_disseminate_format(data): ppl = plumber.Pipeline(root, responsedate, request, cannotdisseminateformat, tobytes) output = next(ppl.run(data, rewrap=True)) return output
def serialize_bad_resumption_token(data): ppl = plumber.Pipeline(root, responsedate, request, badresumptiontoken, tobytes) output = next(ppl.run(data, rewrap=True)) return output
def serialize_no_records_match(data): ppl = plumber.Pipeline(root, responsedate, request, norecordsmatch, tobytes) output = next(ppl.run(data, rewrap=True)) return output
def serialize_list_identifiers(data): ppl = plumber.Pipeline(root, responsedate, request, listidentifiers, tobytes) output = next(ppl.run(data, rewrap=True)) return output