def to_xml(self,p='',indent=2): """ Converts the item to xml format. The prefix is added to each entry """ sp= indent spc=indent*' ' s='%s<%sentry id="%s">\n' %(sp*spc,p,self.get_field('_code','')) sp += 1 s+='%s<%s%s>\n' %(sp*spc,p,self.get('_type','')) for k,e in self.iteritems(): if k == 'author': sp+=1 space=sp*spc+'\n' v= space.join(['%s<%sauthor>%s</%sauthor>'%(sp*spc,p,x,p) for x in self.get_authorsList()]) v= helper.removebraces(v) v= helper.replace_tags(v,'other') sp-=1 s+= '%s<%s%s>\n%s\n%s</%s%s>\n' %(sp*spc,p,'authors',v,sp*spc,p,'authors') else: if helper.is_string_like(e): v= helper.replace_tags(e,'xml') v= helper.handle_math(v) if k=='title': v=helper.capitalizestring(v) v= helper.removebraces(v) v= helper.replace_tags(v,'other') s+= '%s<%s%s>%s</%s%s>\n' %(sp*spc,p,k,v,p,k) sp-=1 s+= '%s</%s%s>\n' %(sp*spc,p,self.get('_type','')) s+= '%s</%sentry>\n' %(sp*spc,p) return s
def extract_entries(text): strings,bibdb=bibtexparse.bibtexload(text.splitlines()) for k,bib in bibdb.iteritems(): bibtexparse.replace_abbrev(bib,def_strings) bibtexparse.replace_abbrev(bib,strings) for key,value in bib.iteritems() : bib[key] = replace_tags(value) return bibdb
def extract_entries(text): strings, bibdb = bibtexparse.bibtexload(text.splitlines()) for k, bib in bibdb.iteritems(): bibtexparse.replace_abbrev(bib, def_strings) bibtexparse.replace_abbrev(bib, strings) for key, value in bib.iteritems(): bib[key] = replace_tags(value) return bibdb
def bibtex_to_xml(bibtexlist, xmlhead=None, xmlfoot=None): if not xmlhead: xmlhead = """<?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE bibxml:file SYSTEM "bibtexml-strict.dtd" > <bibxml:file xmlns:bibxml="http://bibtexml.sf.net/">\n """ if not xmlfoot: xmlfoot = "\n</bibxml:file>" sp = 1 spd = ' ' blist = bibtexlist.copy() entry = '' for Id, bib in blist.iteritems(): tipo = bib['type'] entry += sp * spd + '<bibxml:entry id="' + Id + '">\n' sp += 1 entry += sp * spd + '<bibxml:' + tipo + '>\n' del(bib['id']) del(bib['type']) sp += 1 for k, e in bib.iteritems(): if k == 'author' or k == 'keywords': entry += sp * spd + '<bibxml:' + k + 's>\n' if k == 'author': e = e.replace(',', '') e = string.split(e, ' and ') else: e = string.split(e, ',') field = k sp += 1 for val in e: v = replace_tags(val, 'xml') v = handle_math(v) v = removebraces(v) v = replace_tags(v, 'accents') v = replace_tags(v, 'other') entry += sp * spd + '<bibxml:' + \ field + '>' + v + '</bibxml:' + field + '>\n' sp -= 1 entry += sp * spd + '</bibxml:' + k + 's>\n' else: v = replace_tags(e, 'xml') v = handle_math(v) v = removebraces(v) v = replace_tags(v, 'accents') v = replace_tags(v, 'other') entry += sp * spd + '<bibxml:' + k + '>' + \ v + '</bibxml:' + k + '>\n' sp -= 1 entry += sp * spd + '</bibxml:' + tipo + '>\n' sp -= 1 entry += sp * spd + '</bibxml:entry>\n\n' return xmlhead + entry + xmlfoot
def bibtex_to_xml(bibtexlist, xmlhead=None, xmlfoot=None): if not xmlhead: xmlhead = """<?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE bibxml:file SYSTEM "bibtexml-strict.dtd" > <bibxml:file xmlns:bibxml="http://bibtexml.sf.net/">\n """ if not xmlfoot: xmlfoot = "\n</bibxml:file>" sp = 1 spd = ' ' blist = bibtexlist.copy() entry = '' for Id, bib in blist.iteritems(): tipo = bib['type'] entry += sp * spd + '<bibxml:entry id="' + Id + '">\n' sp += 1 entry += sp * spd + '<bibxml:' + tipo + '>\n' del (bib['id']) del (bib['type']) sp += 1 for k, e in bib.iteritems(): if k == 'author' or k == 'keywords': entry += sp * spd + '<bibxml:' + k + 's>\n' if k == 'author': e = e.replace(',', '') e = string.split(e, ' and ') else: e = string.split(e, ',') field = k sp += 1 for val in e: v = replace_tags(val, 'xml') v = handle_math(v) v = removebraces(v) v = replace_tags(v, 'accents') v = replace_tags(v, 'other') entry += sp * spd + '<bibxml:' + \ field + '>' + v + '</bibxml:' + field + '>\n' sp -= 1 entry += sp * spd + '</bibxml:' + k + 's>\n' else: v = replace_tags(e, 'xml') v = handle_math(v) v = removebraces(v) v = replace_tags(v, 'accents') v = replace_tags(v, 'other') entry += sp * spd + '<bibxml:' + k + '>' + \ v + '</bibxml:' + k + '>\n' sp -= 1 entry += sp * spd + '</bibxml:' + tipo + '>\n' sp -= 1 entry += sp * spd + '</bibxml:entry>\n\n' return xmlhead + entry + xmlfoot
def to_xml(self, p='', indent=2): """ Converts the item to xml format. The prefix is added to each entry """ from xml.sax.saxutils import escape from string import capwords sp = indent * 3 spc = indent * ' ' entry_type = self.get('_type', '') # check it's one of our approved types! if (entry_type != "article" and\ entry_type != "book" and \ entry_type != "incollection" and \ entry_type != "inbook"): return None if (entry_type == 'inbook'): entry_type = 'in_book' if (entry_type == 'incollection'): entry_type = 'in_collection' s = '%s<bibliographic_information>\n' % (sp * spc) sp += 1 s += '%s<%s%s>\n' % (sp * spc, p, entry_type) # We need to pull items out in the right order, which is defined # the the Relax NG schema. Note the schema has , as element seperators, # not &, so order *IS* important. Correct order is: authors, title, # year, journal, pages, number, doi # this varies by type, so lot's of is in here! # Optional fields are checked for None and if so # they are *not* added sp += 1 v = "" for x in self.get_authorsList(): v += '%s<author>\n' % (sp * spc) sp += 1 try: other_names, fam_name = x.strip().rsplit(' ', 1) except ValueError: # only family name fam_name = x.strip() other_names = "--" v += '%s<surname>\n' % (sp * spc) sp += 1 v += '%s<string_value lines="1">%s</string_value>\n' % ( sp * spc, capwords(fam_name)) sp -= 1 v += '%s</surname>\n' % (sp * spc) v += '%s<other_names>\n' % (sp * spc) sp += 1 v += '%s<string_value lines="1">%s</string_value>\n' % ( sp * spc, capwords(other_names)) sp -= 1 v += '%s</other_names>\n' % (sp * spc) sp -= 1 v += '%s</author>\n' % (sp * spc) v = helper.removebraces(v) v = helper.replace_tags(v, 'other') sp -= 1 s += '%s<%s%s>\n%s%s</%s%s>\n' % (sp * spc, p, 'authors', v, sp * spc, p, 'authors') # title k = self.get_field('title') if (helper.is_string_like(k)): k = helper.replace_tags(k, 'xml') sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % (sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<title>%s</title>\n' % (sp * spc, v) # year k = self.get_field('year') sp += 1 v = '\n%s<integer_value rank="0">%s</integer_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<year>%s</year>\n' % (sp * spc, v) # book, in book and incollection have editors here if (entry_type == "book" or entry_type == "in_book" or entry_type == "in_collection"): v = "" for x in self.get_authorsList(who='editor'): try: other_names, fam_name = x.rsplit(' ', 1) except AttributeError: continue v += '%s<editor>\n' % (sp * spc) sp += 1 v += '%s<surname>\n' % (sp * spc) sp += 1 v += '%s<string_value lines="1">%s</string_value>\n' % ( sp * spc, fam_name) sp -= 1 v += '%s</surname>\n' % (sp * spc) v += '%s<other_names>\n' % (sp * spc) sp += 1 v += '%s<string_value lines="1">%s</string_value>\n' % ( sp * spc, other_names) sp -= 1 v += '%s</other_names>\n' % (sp * spc) sp -= 1 v += '%s</editor>\n' % (sp * spc) v = helper.removebraces(v) v = helper.replace_tags(v, 'other') sp -= 1 s += '%s<%s%s>\n%s%s</%s%s>\n' % (sp * spc, p, 'editors', v, sp * spc, p, 'editors') # journal - if article if (entry_type == "article"): k = self.get_field('journal') if (helper.is_string_like(k)): k = helper.replace_tags(k, 'xml') sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<journal>%s</journal>\n' % (sp * spc, v) # booktitle - if incollection if (entry_type == "in_collection"): k = self.get_field('booktitle') if (helper.is_string_like(k)): k = helper.replace_tags(k, 'xml') sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<booktitle>%s</booktitle>\n' % (sp * spc, v) ## All entries are now optional - if they aren't in the bib file, don't add ## empty tags, just ignore completely. # series - everything but article if (entry_type == "book" or entry_type == "in_book" or entry_type == "in_collection"): k = self.get_field('series') if (helper.is_string_like(k)): k = helper.replace_tags(k, 'xml') if (k != None): sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<series>%s</series>\n' % (sp * spc, v) # publisher - everything but article if (entry_type == "book" or entry_type == "in_book" or entry_type == "in_collection"): k = self.get_field('publisher') if (helper.is_string_like(k)): k = helper.replace_tags(k, 'xml') if (k != None): sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<publisher>%s</publisher>\n' % (sp * spc, v) # volume if (entry_type == "article"): volume = self.get_field('volume') if (helper.is_string_like(volume)): volume = helper.replace_tags(volume, 'xml') if (volume != None): sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, volume, (sp - 1) * spc) sp -= 1 s += '%s<volume>%s</volume>\n' % (sp * spc, v) # pages - unless in a book if (entry_type != "book"): pages = "" lastpage = self.get_field('lastpage') firstpage = self.get_field('firstpage') if (lastpage != None and firstpage != None): if (lastpage == ''): pages = firstpage else: pages = firstpage + "-" + lastpage # escape pages = escape(pages) sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, pages, (sp - 1) * spc) sp -= 1 s += '%s<pages>%s</pages>\n' % (sp * spc, v) # number - article only if (entry_type == "article"): k = self.get_field('number') if (k != None): sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<issue>%s</issue>\n' % (sp * spc, v) # doi - everyone! k = self.get_field('doi') if (k != None): if (helper.is_string_like(k)): k = helper.replace_tags(k, 'xml') sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<doi>%s</doi>\n' % (sp * spc, v) # everyone k = self.get_field('url') if (k != None): if (helper.is_string_like(k)): k = helper.replace_tags(k, 'xml') sp += 1 v = '\n%s<string_value lines="1">%s</string_value>\n%s' % ( sp * spc, k, (sp - 1) * spc) sp -= 1 s += '%s<url>%s</url>\n' % (sp * spc, v) sp -= 1 s += '%s</%s%s>\n' % (sp * spc, p, entry_type) s += '%s</%sbibliographic_information>\n' % (sp * spc, p) return s
def __format_value(self, bibkey, value, style): required = style["required"] optional = style["optional"] if bibkey in value: if bibkey in required: pre = required[bibkey]["pre"] post = required[bibkey]["post"] entry = required[bibkey] else: pre = optional[bibkey]["pre"] post = optional[bibkey]["post"] entry = optional[bibkey] span = tag.span(class_=bibkey) self.__stack.append(pre) if "presub" in entry: for sub in entry["presub"]: self.__stack.append(self.__format_value(sub, value, style)) if bibkey in BIBTEX_PERSON: a = authors(value[bibkey]) for person in a: if "first" in person: formatted = "" for first in person["first"].split(" "): first = remove_braces(replace_tags(first)) if len(first) > 0: formatted = formatted + first[0] + "." partspan = tag.span(class_="first") partspan.append(formatted) span.append(partspan) span.append(" ") for part in ["von", "last"]: if part in person: partspan = tag.span(class_=part) partspan.append(remove_braces(replace_tags(person[part]))) span.append(partspan) if part != "last": span.append(" ") if person != a[-1] and len(a) < 3: span.append(" and ") else: if len(a) >= 3: etal = tag.span(class_="etal") etal.append(" et al.") span.append(etal) if bibkey == "editor": if len(a) > 1 and person == a[-1]: span.append(", Eds.") else: span.append(", Ed.") break elif bibkey == "url": url = value["url"] span.append(tag.a(href=url)(unicode_unquote(url))) elif bibkey == "doi": url = "http://dx.doi.org/" + value["doi"].strip() span.append(tag.a(href=url)(value["doi"])) else: if bibkey == "pages": value[bibkey] = re.sub("---", "--", value[bibkey]) value[bibkey] = re.sub(r"([^-])-([^-])", r"\1--\2", value[bibkey]) span.append(Markup(capitalizetitle(replace_tags(value[bibkey])))) self.__stack.append(span) if "postsub" in entry: for sub in entry["postsub"]: self.__format_value(sub, value, style) self.__stack.append(post)
def __format_value(self, bibkey, value, style): required = style['required'] optional = style['optional'] if bibkey in value: if bibkey in required: pre = required[bibkey]['pre'] post = required[bibkey]['post'] entry = required[bibkey] else: pre = optional[bibkey]['pre'] post = optional[bibkey]['post'] entry = optional[bibkey] span = tag.span(class_=bibkey) self.__stack.append(pre) if 'presub' in entry: for sub in entry['presub']: self.__stack.append(self.__format_value(sub, value, style)) if bibkey in BIBTEX_PERSON: a = authors(value[bibkey]) for person in a: if 'first' in person: formatted = "" for first in person['first'].split(' '): first = remove_braces(replace_tags(first)) if len(first) > 0: formatted = formatted + first[0] + "." partspan = tag.span(class_='first') partspan.append(formatted) span.append(partspan) span.append(" ") for part in ['von', 'last']: if part in person: partspan = tag.span(class_=part) partspan.append( remove_braces(replace_tags(person[part]))) span.append(partspan) if part != 'last': span.append(" ") if person != a[-1] and len(a) < 3: span.append(" and ") else: if len(a) >= 3: etal = tag.span(class_='etal') etal.append(" et al.") span.append(etal) if bibkey == 'editor': if len(a) > 1 and person == a[-1]: span.append(", Eds.") else: span.append(", Ed.") break elif bibkey == 'url': url = value['url'] span.append(tag.a(href=url)(unicode_unquote(url))) elif bibkey == 'doi': url = 'http://dx.doi.org/' + value['doi'].strip() span.append(tag.a(href=url)(value['doi'])) else: if bibkey == 'pages': value[bibkey] = re.sub('---', '--', value[bibkey]) value[bibkey] = re.sub(r'([^-])-([^-])', r'\1--\2', value[bibkey]) span.append( Markup(capitalizetitle(replace_tags(value[bibkey])))) self.__stack.append(span) if 'postsub' in entry: for sub in entry['postsub']: self.__format_value(sub, value, style) self.__stack.append(post)