def from_string(cls, string): first = named('first', some) middle = named('middle', some) last = named('last', some) patterns = [ last + ',\s+' + first + '\s+' + middle + end, # Auden, Wystan Hugh last + ',\s+' + first + end, # Auden, Wystan first + '\s+' + middle + '\s+' + last + end, # Wystan Hugh Auden first + '\s+' + last + end, # Wystan Auden ] for pattern in patterns: match = re.match(pattern, string) if match: groups = match.groupdict() return cls(groups['first'], groups.get('middle'), groups['last']) raise Exception('Cannot parse name: "%s"' % string)
import re import requests from xdoc.dom import Author, Reference from xdoc.lib.regex import named, maybe, anything, some, s, sep, end from xdoc.formats.tex.bibliography import parse_bibtex from unidecode import unidecode from xdoc.lib.log import logging logger = logging.getLogger(__name__) # \((\d{4}\w?,?)+\)/ # re_authors = r'(?P<authors>.+?)\s*' # re_authors_editors = r'(?P<authors>.+?)\s*(?P<editor>\(ed(itor)?s\.?\)\s+)?\s*' re_editors = r'(?P<editor>.+?)\s*\(ed(itor)?s?\.?\)\s*' re_year = named('year', '\d{4}') + named('subyear', r'\w?') re_title = r'(?P<title>[^.]+)\.\s*' re_title_i = r'(?P<title>.+?)[.,]?\s*' # \u2013 is the em-dash re_page = ur'(?P<page_begin>\d+)(-|--|\u2013)(?P<page_end>\d+)' # :?\s*' + re_page + ' re_vol = r'(Volume\s+)?(?P<volume>\d+(\.\d+)?)' re_edition = r'\((?P<edition>\d+)\)' re_pub_address = r'(?P<publisher>[^,]+)([.,]|, (?P<address>.*[^.])[.,]?)\s*' re_doi = r'(http://dx.doi.org/(?P<doi>\S+))?' media_regex = [{ # Horn, Larry. 1972. On the semantic properties of logical operators in English: UCLA dissertation. 'medium': 'phdthesis', 'pattern':