def get_references(self, xml_doc):
     for ref in xml_doc.getElementsByTagName("ce:bib-reference"):
         label = get_value_in_tag(ref, "ce:label")
         if self.CONSYN:
             innerrefs = ref.getElementsByTagName("sb:reference")
             if not innerrefs:
                 yield self._get_ref(ref, label) 
             for inner in innerrefs:
                 yield self._get_ref(inner, label)
         else:
             authors = []
             for author in ref.getElementsByTagName("sb:author"):
                 given_name = get_value_in_tag(author, "ce:given-name")
                 surname = get_value_in_tag(author, "ce:surname")
                 if given_name:
                     name = "%s, %s" % (surname, given_name)
                 else:
                     name = surname
                 authors.append(name)
             doi = get_value_in_tag(ref, "ce:doi")
             issue = get_value_in_tag(ref, "sb:issue")
             page = get_value_in_tag(ref, "sb:first-page")
             title = get_value_in_tag(ref, "sb:maintitle")
             volume = get_value_in_tag(ref, "sb:volume-nr")
             tmp_issues = ref.getElementsByTagName('sb:issue')
             if tmp_issues:
                 year = get_value_in_tag(tmp_issues[0], "sb:date")[:4]
             else:
                 year = ''
             textref = ref.getElementsByTagName("ce:textref")
             if textref:
                 textref = xml_to_text(textref[0])
             ext_link = format_arxiv_id(self.get_ref_link(ref, 'arxiv'))
             yield (label, authors, doi, issue, page, title, volume,
                    year, textref, ext_link)
 def _get_ref(self, ref, label):
     doi = get_value_in_tag(ref, "ce:doi")
     page = get_value_in_tag(ref, "sb:first-page")
     issue = get_value_in_tag(ref, "sb:issue")
     title = get_value_in_tag(ref, "sb:maintitle")
     volume = get_value_in_tag(ref, "sb:volume-nr")
     tmp_issues = ref.getElementsByTagName('sb:issue')
     if tmp_issues:
         year = get_value_in_tag(tmp_issues[0], "sb:date")
     else:
         year = ''
     textref = ref.getElementsByTagName("ce:textref")
     if textref:
         textref = xml_to_text(textref[0])
     ext_link = format_arxiv_id(self.get_ref_link(ref, 'arxiv'))
     authors = []
     for author in ref.getElementsByTagName("sb:author"):
         given_name = get_value_in_tag(author, "ce:given-name")
         surname = get_value_in_tag(author, "ce:surname")
         if given_name:
             name = "%s, %s" % (surname, given_name)
         else:
             name = surname
         authors.append(name)
     if ext_link and ext_link.lower().startswith('arxiv'):
         # check if the identifier contains
         # digits seperated by dot
         regex = r'\d*\.\d*'
         if not re.search(regex, ext_link):
             ext_link = ext_link[6:]
     comment = get_value_in_tag(ref, "sb:comment")
     links = []
     for link in ref.getElementsByTagName("ce:inter-ref"):
         if link.firstChild:
             links.append(link.firstChild.data.encode('utf-8'))
     title = ""
     try:
         container = ref.getElementsByTagName("sb:contribution")[0]
         title = container.getElementsByTagName("sb:maintitle")[0]
         title = xml_to_text(title)
     except IndexError:
         title = ''
     except TypeError:
         title = ''
     isjournal = ref.getElementsByTagName("sb:issue")
     journal = ""
     if isjournal:
         if not page:
             page =  comment
         container = ref.getElementsByTagName("sb:issue")[0]
         journal = get_value_in_tag(container, "sb:maintitle")
     edited_book = ref.getElementsByTagName("sb:edited-book")
     editors = []
     book_title = ""
     publisher = ""
     if edited_book:
         # treat as a journal
         if ref.getElementsByTagName("sb:book-series"):
             container = ref.getElementsByTagName("sb:book-series")[0]
             journal = get_value_in_tag(container, "sb:maintitle")
             year = get_value_in_tag(ref, "sb:date")
             isjournal = True
         # conference
         elif ref.getElementsByTagName("sb:conference"):
             container = ref.getElementsByTagName(
                 "sb:edited-book")[0]
             maintitle = get_value_in_tag(container, "sb:maintitle")
             conference = get_value_in_tag(
                 container, "sb:conference")
             date = get_value_in_tag(container, "sb:date")
             # use this variable in order to get in the 'm' field
             publisher = maintitle + ", " + conference + ", " + date
         else:
             container = ref.getElementsByTagName(
                 "sb:edited-book")[0]
             if ref.getElementsByTagName("sb:editors"):
                 for editor in ref.getElementsByTagName("sb:editor"):
                     surname = get_value_in_tag(editor, "ce:surname")
                     firstname = get_value_in_tag(editor, "ce:given-name")
                     editors.append("%s,%s" % (surname, firstname))
             if title:
                 book_title = get_value_in_tag(
                     container, "sb:maintitle")
             else:
                 title = get_value_in_tag(container, "sb:maintitle")
             year = get_value_in_tag(container, "sb:date")
             if ref.getElementsByTagName("sb:publisher"):
                 container = ref.getElementsByTagName(
                     "sb:publisher")[0]
                 location = get_value_in_tag(
                     container, "sb:location")
                 publisher = get_value_in_tag(container, "sb:name")
                 if location:
                     publisher = location + ": " + publisher
     if ref.getElementsByTagName("sb:book"):
         if ref.getElementsByTagName("sb:book-series"):
             book_series = ref.getElementsByTagName(
                 "sb:book-series")[0]
             title += ", " + \
                 get_value_in_tag(book_series, "sb:maintitle")
             title += ", " + \
                 get_value_in_tag(book_series, "sb:volume-nr")
         publisher = get_value_in_tag(ref, "sb:publisher")
     if not year:
         year = get_value_in_tag(ref, "sb:date")
     year = re.sub(r'\D', '', year)
     return (label, authors, doi, issue, page, title, volume,
            year, textref, ext_link, isjournal, comment, journal,
            publisher, editors, book_title)