Exemple #1
0
 def get_references(self, xml):
     references = []
     for reference in xml.getElementsByTagName("ref"):
         plain_text = None
         ref_type = reference.getElementsByTagName('citation')[0].getAttribute('publication-type').encode('utf-8')
         label = get_value_in_tag(reference, "label").strip('.')
         authors = []
         for author in reference.getElementsByTagName("name"):
             given_name = get_value_in_tag(author, "given-names")
             surname = get_value_in_tag(author, "surname")
             if given_name:
                 name = "%s, %s" % (surname, given_name)
             else:
                 name = surname
             if name.strip().split() == []:
                 name = get_value_in_tag(author, "string-name")
             authors.append(name)
         doi_tag = reference.getElementsByTagName("pub-id")
         doi = ""
         for tag in doi_tag:
             if tag.getAttribute("pub-id-type") == "doi":
                 doi = xml_to_text(tag)
         issue = get_value_in_tag(reference, "issue")
         page = get_value_in_tag(reference, "fpage")
         page_last = get_value_in_tag(reference, "lpage")
         title = get_value_in_tag(reference, "source")
         volume = get_value_in_tag(reference, "volume")
         year = get_value_in_tag(reference, "year")
         ext_link = format_arxiv_id(super(NLMParser, self).get_ref_link(reference, "arxiv"))
         if ref_type != 'journal':
             plain_text = get_value_in_tag(reference, "mixed-citation")
         references.append((label, authors, doi, issue, page, page_last, title, volume, year, ext_link, plain_text))
     self.references = references
Exemple #2
0
 def get_references(self, xml):
     references = []
     for reference in xml.getElementsByTagName("ce:bib-reference"):
         label = get_value_in_tag(reference, "ce:label")
         authors = []
         for author in reference.getElementsByTagName("sb:author"):
             given_name = get_value_in_tag(author, "ce:given-name")
             surname = get_value_in_tag(author, "ce:surname")
             if given_name:
                 name = "%s, %s" % (surname, given_name)
             else:
                 name = surname
             authors.append(name)
         doi = get_value_in_tag(reference, "ce:doi")
         issue = get_value_in_tag(reference, "sb:issue")
         page = get_value_in_tag(reference, "sb:first-page")
         title = get_value_in_tag(reference, "sb:maintitle")
         volume = get_value_in_tag(reference, "sb:volume-nr")
         tmp_issues = reference.getElementsByTagName('sb:issue')
         if tmp_issues:
             year = get_value_in_tag(tmp_issues[0], "sb:date")[:4]
         else:
             year = None
         textref = get_value_in_tag(reference, "ce:textref")
         ext_link = format_arxiv_id(self.get_ref_link(reference, 'arxiv'))
         references.append((label, authors, doi, issue, page, title, volume, year, textref, ext_link))
     return references