def get_references(self, xml): references = [] for reference in xml.getElementsByTagName("ref"): plain_text = None ref_type = reference.getElementsByTagName('citation')[0].getAttribute('publication-type').encode('utf-8') label = get_value_in_tag(reference, "label").strip('.') authors = [] for author in reference.getElementsByTagName("name"): given_name = get_value_in_tag(author, "given-names") surname = get_value_in_tag(author, "surname") if given_name: name = "%s, %s" % (surname, given_name) else: name = surname if name.strip().split() == []: name = get_value_in_tag(author, "string-name") authors.append(name) doi_tag = reference.getElementsByTagName("pub-id") doi = "" for tag in doi_tag: if tag.getAttribute("pub-id-type") == "doi": doi = xml_to_text(tag) issue = get_value_in_tag(reference, "issue") page = get_value_in_tag(reference, "fpage") page_last = get_value_in_tag(reference, "lpage") title = get_value_in_tag(reference, "source") volume = get_value_in_tag(reference, "volume") year = get_value_in_tag(reference, "year") ext_link = format_arxiv_id(super(NLMParser, self).get_ref_link(reference, "arxiv")) if ref_type != 'journal': plain_text = get_value_in_tag(reference, "mixed-citation") references.append((label, authors, doi, issue, page, page_last, title, volume, year, ext_link, plain_text)) self.references = references
def get_references(self, xml): references = [] for reference in xml.getElementsByTagName("ce:bib-reference"): label = get_value_in_tag(reference, "ce:label") authors = [] for author in reference.getElementsByTagName("sb:author"): given_name = get_value_in_tag(author, "ce:given-name") surname = get_value_in_tag(author, "ce:surname") if given_name: name = "%s, %s" % (surname, given_name) else: name = surname authors.append(name) doi = get_value_in_tag(reference, "ce:doi") issue = get_value_in_tag(reference, "sb:issue") page = get_value_in_tag(reference, "sb:first-page") title = get_value_in_tag(reference, "sb:maintitle") volume = get_value_in_tag(reference, "sb:volume-nr") tmp_issues = reference.getElementsByTagName('sb:issue') if tmp_issues: year = get_value_in_tag(tmp_issues[0], "sb:date")[:4] else: year = None textref = get_value_in_tag(reference, "ce:textref") ext_link = format_arxiv_id(self.get_ref_link(reference, 'arxiv')) references.append((label, authors, doi, issue, page, title, volume, year, textref, ext_link)) return references