Example #1
0
  def to_xml(self,p='',indent=2):
    """
    Converts the item to xml format. The prefix is added to each entry
    """
    sp= indent
    spc=indent*' '
    s='%s<%sentry id="%s">\n' %(sp*spc,p,self.get_field('_code',''))
    sp += 1
    s+='%s<%s%s>\n' %(sp*spc,p,self.get('_type',''))

    for k,e in self.iteritems():
      if k == 'author':
        sp+=1
        space=sp*spc+'\n'
        v= space.join(['%s<%sauthor>%s</%sauthor>'%(sp*spc,p,x,p) for x in self.get_authorsList()])
        v= helper.removebraces(v)
        v= helper.replace_tags(v,'other')
        sp-=1
        s+= '%s<%s%s>\n%s\n%s</%s%s>\n' %(sp*spc,p,'authors',v,sp*spc,p,'authors')
      else:
        if helper.is_string_like(e):
          v= helper.replace_tags(e,'xml')
          v= helper.handle_math(v)
        if k=='title':
          v=helper.capitalizestring(v)
          v= helper.removebraces(v)
          v= helper.replace_tags(v,'other')
        s+= '%s<%s%s>%s</%s%s>\n' %(sp*spc,p,k,v,p,k)

    sp-=1
    s+= '%s</%s%s>\n' %(sp*spc,p,self.get('_type',''))
    s+= '%s</%sentry>\n' %(sp*spc,p)
    return s
Example #2
0
def extract_entries(text):
  strings,bibdb=bibtexparse.bibtexload(text.splitlines())
  for k,bib in bibdb.iteritems():
      bibtexparse.replace_abbrev(bib,def_strings)
      bibtexparse.replace_abbrev(bib,strings)
      for key,value in bib.iteritems() :
        bib[key] = replace_tags(value)
  return bibdb
Example #3
0
def extract_entries(text):
    strings, bibdb = bibtexparse.bibtexload(text.splitlines())
    for k, bib in bibdb.iteritems():
        bibtexparse.replace_abbrev(bib, def_strings)
        bibtexparse.replace_abbrev(bib, strings)
        for key, value in bib.iteritems():
            bib[key] = replace_tags(value)
    return bibdb
Example #4
0
def bibtex_to_xml(bibtexlist, xmlhead=None, xmlfoot=None):
    if not xmlhead:
        xmlhead = """<?xml version="1.0" encoding="iso-8859-1"?>
    <!DOCTYPE bibxml:file SYSTEM "bibtexml-strict.dtd" >
    <bibxml:file xmlns:bibxml="http://bibtexml.sf.net/">\n
    """
    if not xmlfoot:
        xmlfoot = "\n</bibxml:file>"

    sp = 1
    spd = '  '
    blist = bibtexlist.copy()
    entry = ''

    for Id, bib in blist.iteritems():

        tipo = bib['type']
        entry += sp * spd + '<bibxml:entry id="' + Id + '">\n'
        sp += 1
        entry += sp * spd + '<bibxml:' + tipo + '>\n'
        del(bib['id'])
        del(bib['type'])
        sp += 1

        for k, e in bib.iteritems():
            if k == 'author' or k == 'keywords':
                entry += sp * spd + '<bibxml:' + k + 's>\n'
                if k == 'author':
                    e = e.replace(',', '')
                    e = string.split(e, ' and ')
                else:
                    e = string.split(e, ',')
                field = k
                sp += 1
                for val in e:
                    v = replace_tags(val, 'xml')
                    v = handle_math(v)
                    v = removebraces(v)
                    v = replace_tags(v, 'accents')
                    v = replace_tags(v, 'other')
                    entry += sp * spd + '<bibxml:' + \
                        field + '>' + v + '</bibxml:' + field + '>\n'

                sp -= 1
                entry += sp * spd + '</bibxml:' + k + 's>\n'
            else:
                v = replace_tags(e, 'xml')
                v = handle_math(v)
                v = removebraces(v)
                v = replace_tags(v, 'accents')
                v = replace_tags(v, 'other')
                entry += sp * spd + '<bibxml:' + k + '>' + \
                    v + '</bibxml:' + k + '>\n'

        sp -= 1
        entry += sp * spd + '</bibxml:' + tipo + '>\n'
        sp -= 1
        entry += sp * spd + '</bibxml:entry>\n\n'
    return xmlhead + entry + xmlfoot
Example #5
0
def bibtex_to_xml(bibtexlist, xmlhead=None, xmlfoot=None):
    if not xmlhead:
        xmlhead = """<?xml version="1.0" encoding="iso-8859-1"?>
    <!DOCTYPE bibxml:file SYSTEM "bibtexml-strict.dtd" >
    <bibxml:file xmlns:bibxml="http://bibtexml.sf.net/">\n
    """
    if not xmlfoot:
        xmlfoot = "\n</bibxml:file>"

    sp = 1
    spd = '  '
    blist = bibtexlist.copy()
    entry = ''

    for Id, bib in blist.iteritems():

        tipo = bib['type']
        entry += sp * spd + '<bibxml:entry id="' + Id + '">\n'
        sp += 1
        entry += sp * spd + '<bibxml:' + tipo + '>\n'
        del (bib['id'])
        del (bib['type'])
        sp += 1

        for k, e in bib.iteritems():
            if k == 'author' or k == 'keywords':
                entry += sp * spd + '<bibxml:' + k + 's>\n'
                if k == 'author':
                    e = e.replace(',', '')
                    e = string.split(e, ' and ')
                else:
                    e = string.split(e, ',')
                field = k
                sp += 1
                for val in e:
                    v = replace_tags(val, 'xml')
                    v = handle_math(v)
                    v = removebraces(v)
                    v = replace_tags(v, 'accents')
                    v = replace_tags(v, 'other')
                    entry += sp * spd + '<bibxml:' + \
                        field + '>' + v + '</bibxml:' + field + '>\n'

                sp -= 1
                entry += sp * spd + '</bibxml:' + k + 's>\n'
            else:
                v = replace_tags(e, 'xml')
                v = handle_math(v)
                v = removebraces(v)
                v = replace_tags(v, 'accents')
                v = replace_tags(v, 'other')
                entry += sp * spd + '<bibxml:' + k + '>' + \
                    v + '</bibxml:' + k + '>\n'

        sp -= 1
        entry += sp * spd + '</bibxml:' + tipo + '>\n'
        sp -= 1
        entry += sp * spd + '</bibxml:entry>\n\n'
    return xmlhead + entry + xmlfoot
Example #6
0
    def to_xml(self, p='', indent=2):
        """
    Converts the item to xml format. The prefix is added to each entry
    """
        from xml.sax.saxutils import escape
        from string import capwords

        sp = indent * 3
        spc = indent * ' '
        entry_type = self.get('_type', '')
        # check it's one of our approved types!
        if (entry_type != "article" and\
            entry_type != "book" and \
            entry_type != "incollection" and \
            entry_type != "inbook"):
            return None

        if (entry_type == 'inbook'):
            entry_type = 'in_book'
        if (entry_type == 'incollection'):
            entry_type = 'in_collection'

        s = '%s<bibliographic_information>\n' % (sp * spc)
        sp += 1
        s += '%s<%s%s>\n' % (sp * spc, p, entry_type)

        # We need to pull items out in the right order, which is defined
        # the the Relax NG schema. Note the schema has , as element seperators,
        # not &, so order *IS* important. Correct order is: authors, title,
        # year, journal, pages, number, doi
        # this varies by type, so lot's of is in here!
        # Optional fields are checked for None and if so
        # they are *not* added
        sp += 1
        v = ""
        for x in self.get_authorsList():
            v += '%s<author>\n' % (sp * spc)
            sp += 1
            try:
                other_names, fam_name = x.strip().rsplit(' ', 1)
            except ValueError:  # only family name
                fam_name = x.strip()
                other_names = "--"

            v += '%s<surname>\n' % (sp * spc)
            sp += 1
            v += '%s<string_value lines="1">%s</string_value>\n' % (
                sp * spc, capwords(fam_name))
            sp -= 1
            v += '%s</surname>\n' % (sp * spc)
            v += '%s<other_names>\n' % (sp * spc)
            sp += 1
            v += '%s<string_value lines="1">%s</string_value>\n' % (
                sp * spc, capwords(other_names))
            sp -= 1
            v += '%s</other_names>\n' % (sp * spc)
            sp -= 1
            v += '%s</author>\n' % (sp * spc)
            v = helper.removebraces(v)
            v = helper.replace_tags(v, 'other')
        sp -= 1
        s += '%s<%s%s>\n%s%s</%s%s>\n' % (sp * spc, p, 'authors', v, sp * spc,
                                          p, 'authors')

        # title
        k = self.get_field('title')
        if (helper.is_string_like(k)):
            k = helper.replace_tags(k, 'xml')
        sp += 1
        v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (sp * spc, k,
                                                                   (sp - 1) *
                                                                   spc)
        sp -= 1
        s += '%s<title>%s</title>\n' % (sp * spc, v)

        # year
        k = self.get_field('year')
        sp += 1
        v = '\n%s<integer_value rank="0">%s</integer_value>\n%s' % (
            sp * spc, k, (sp - 1) * spc)
        sp -= 1
        s += '%s<year>%s</year>\n' % (sp * spc, v)

        # book, in book and incollection have editors here
        if (entry_type == "book" or entry_type == "in_book"
                or entry_type == "in_collection"):
            v = ""
            for x in self.get_authorsList(who='editor'):
                try:
                    other_names, fam_name = x.rsplit(' ', 1)
                except AttributeError:
                    continue
                v += '%s<editor>\n' % (sp * spc)
                sp += 1
                v += '%s<surname>\n' % (sp * spc)
                sp += 1
                v += '%s<string_value lines="1">%s</string_value>\n' % (
                    sp * spc, fam_name)
                sp -= 1
                v += '%s</surname>\n' % (sp * spc)
                v += '%s<other_names>\n' % (sp * spc)
                sp += 1
                v += '%s<string_value lines="1">%s</string_value>\n' % (
                    sp * spc, other_names)
                sp -= 1
                v += '%s</other_names>\n' % (sp * spc)
                sp -= 1
                v += '%s</editor>\n' % (sp * spc)
                v = helper.removebraces(v)
                v = helper.replace_tags(v, 'other')
            sp -= 1
            s += '%s<%s%s>\n%s%s</%s%s>\n' % (sp * spc, p, 'editors', v,
                                              sp * spc, p, 'editors')

        # journal - if article
        if (entry_type == "article"):
            k = self.get_field('journal')
            if (helper.is_string_like(k)):
                k = helper.replace_tags(k, 'xml')
            sp += 1
            v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                sp * spc, k, (sp - 1) * spc)
            sp -= 1
            s += '%s<journal>%s</journal>\n' % (sp * spc, v)

        # booktitle - if incollection
        if (entry_type == "in_collection"):
            k = self.get_field('booktitle')
            if (helper.is_string_like(k)):
                k = helper.replace_tags(k, 'xml')
            sp += 1
            v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                sp * spc, k, (sp - 1) * spc)
            sp -= 1
            s += '%s<booktitle>%s</booktitle>\n' % (sp * spc, v)

        ## All entries are now optional - if they aren't in the bib file, don't add
        ## empty tags, just ignore completely.

        # series - everything but article
        if (entry_type == "book" or entry_type == "in_book"
                or entry_type == "in_collection"):
            k = self.get_field('series')
            if (helper.is_string_like(k)):
                k = helper.replace_tags(k, 'xml')
            if (k != None):
                sp += 1
                v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                    sp * spc, k, (sp - 1) * spc)
                sp -= 1
                s += '%s<series>%s</series>\n' % (sp * spc, v)

        # publisher - everything but article
        if (entry_type == "book" or entry_type == "in_book"
                or entry_type == "in_collection"):
            k = self.get_field('publisher')
            if (helper.is_string_like(k)):
                k = helper.replace_tags(k, 'xml')
            if (k != None):
                sp += 1
                v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                    sp * spc, k, (sp - 1) * spc)
                sp -= 1
                s += '%s<publisher>%s</publisher>\n' % (sp * spc, v)

        # volume
        if (entry_type == "article"):
            volume = self.get_field('volume')
            if (helper.is_string_like(volume)):
                volume = helper.replace_tags(volume, 'xml')
            if (volume != None):
                sp += 1
                v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                    sp * spc, volume, (sp - 1) * spc)
                sp -= 1
                s += '%s<volume>%s</volume>\n' % (sp * spc, v)

        # pages - unless in a book
        if (entry_type != "book"):
            pages = ""
            lastpage = self.get_field('lastpage')
            firstpage = self.get_field('firstpage')
            if (lastpage != None and firstpage != None):
                if (lastpage == ''):
                    pages = firstpage
                else:
                    pages = firstpage + "-" + lastpage
                # escape
                pages = escape(pages)
                sp += 1
                v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                    sp * spc, pages, (sp - 1) * spc)
                sp -= 1
                s += '%s<pages>%s</pages>\n' % (sp * spc, v)

        # number - article only
        if (entry_type == "article"):
            k = self.get_field('number')
            if (k != None):
                sp += 1
                v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                    sp * spc, k, (sp - 1) * spc)
                sp -= 1
                s += '%s<issue>%s</issue>\n' % (sp * spc, v)

        # doi - everyone!
        k = self.get_field('doi')
        if (k != None):
            if (helper.is_string_like(k)):
                k = helper.replace_tags(k, 'xml')
            sp += 1
            v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                sp * spc, k, (sp - 1) * spc)
            sp -= 1
            s += '%s<doi>%s</doi>\n' % (sp * spc, v)

        # everyone
        k = self.get_field('url')
        if (k != None):
            if (helper.is_string_like(k)):
                k = helper.replace_tags(k, 'xml')
            sp += 1
            v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (
                sp * spc, k, (sp - 1) * spc)
            sp -= 1
            s += '%s<url>%s</url>\n' % (sp * spc, v)

        sp -= 1
        s += '%s</%s%s>\n' % (sp * spc, p, entry_type)
        s += '%s</%sbibliographic_information>\n' % (sp * spc, p)
        return s
Example #7
0
    def __format_value(self, bibkey, value, style):
        required = style["required"]
        optional = style["optional"]
        if bibkey in value:
            if bibkey in required:
                pre = required[bibkey]["pre"]
                post = required[bibkey]["post"]
                entry = required[bibkey]
            else:
                pre = optional[bibkey]["pre"]
                post = optional[bibkey]["post"]
                entry = optional[bibkey]

            span = tag.span(class_=bibkey)
            self.__stack.append(pre)
            if "presub" in entry:
                for sub in entry["presub"]:
                    self.__stack.append(self.__format_value(sub, value, style))

            if bibkey in BIBTEX_PERSON:
                a = authors(value[bibkey])
                for person in a:
                    if "first" in person:
                        formatted = ""
                        for first in person["first"].split(" "):
                            first = remove_braces(replace_tags(first))
                            if len(first) > 0:
                                formatted = formatted + first[0] + "."
                        partspan = tag.span(class_="first")
                        partspan.append(formatted)
                        span.append(partspan)
                        span.append(" ")

                    for part in ["von", "last"]:
                        if part in person:
                            partspan = tag.span(class_=part)
                            partspan.append(remove_braces(replace_tags(person[part])))
                            span.append(partspan)
                            if part != "last":
                                span.append(" ")
                    if person != a[-1] and len(a) < 3:
                        span.append(" and ")
                    else:
                        if len(a) >= 3:
                            etal = tag.span(class_="etal")
                            etal.append(" et al.")
                            span.append(etal)
                        if bibkey == "editor":
                            if len(a) > 1 and person == a[-1]:
                                span.append(", Eds.")
                            else:
                                span.append(", Ed.")
                        break

            elif bibkey == "url":
                url = value["url"]
                span.append(tag.a(href=url)(unicode_unquote(url)))
            elif bibkey == "doi":
                url = "http://dx.doi.org/" + value["doi"].strip()
                span.append(tag.a(href=url)(value["doi"]))
            else:
                if bibkey == "pages":
                    value[bibkey] = re.sub("---", "--", value[bibkey])
                    value[bibkey] = re.sub(r"([^-])-([^-])", r"\1--\2", value[bibkey])
                span.append(Markup(capitalizetitle(replace_tags(value[bibkey]))))
            self.__stack.append(span)
            if "postsub" in entry:
                for sub in entry["postsub"]:
                    self.__format_value(sub, value, style)
            self.__stack.append(post)
Example #8
0
    def __format_value(self, bibkey, value, style):
        required = style['required']
        optional = style['optional']
        if bibkey in value:
            if bibkey in required:
                pre = required[bibkey]['pre']
                post = required[bibkey]['post']
                entry = required[bibkey]
            else:
                pre = optional[bibkey]['pre']
                post = optional[bibkey]['post']
                entry = optional[bibkey]

            span = tag.span(class_=bibkey)
            self.__stack.append(pre)
            if 'presub' in entry:
                for sub in entry['presub']:
                    self.__stack.append(self.__format_value(sub, value, style))

            if bibkey in BIBTEX_PERSON:
                a = authors(value[bibkey])
                for person in a:
                    if 'first' in person:
                        formatted = ""
                        for first in person['first'].split(' '):
                            first = remove_braces(replace_tags(first))
                            if len(first) > 0:
                                formatted = formatted + first[0] + "."
                        partspan = tag.span(class_='first')
                        partspan.append(formatted)
                        span.append(partspan)
                        span.append(" ")

                    for part in ['von', 'last']:
                        if part in person:
                            partspan = tag.span(class_=part)
                            partspan.append(
                                remove_braces(replace_tags(person[part])))
                            span.append(partspan)
                            if part != 'last':
                                span.append(" ")
                    if person != a[-1] and len(a) < 3:
                        span.append(" and ")
                    else:
                        if len(a) >= 3:
                            etal = tag.span(class_='etal')
                            etal.append(" et al.")
                            span.append(etal)
                        if bibkey == 'editor':
                            if len(a) > 1 and person == a[-1]:
                                span.append(", Eds.")
                            else:
                                span.append(", Ed.")
                        break

            elif bibkey == 'url':
                url = value['url']
                span.append(tag.a(href=url)(unicode_unquote(url)))
            elif bibkey == 'doi':
                url = 'http://dx.doi.org/' + value['doi'].strip()
                span.append(tag.a(href=url)(value['doi']))
            else:
                if bibkey == 'pages':
                    value[bibkey] = re.sub('---', '--', value[bibkey])
                    value[bibkey] = re.sub(r'([^-])-([^-])', r'\1--\2',
                                           value[bibkey])
                span.append(
                    Markup(capitalizetitle(replace_tags(value[bibkey]))))
            self.__stack.append(span)
            if 'postsub' in entry:
                for sub in entry['postsub']:
                    self.__format_value(sub, value, style)
            self.__stack.append(post)