def lowerupper(s): parts, lower, upper = [x for x in relu.split(s) if x], [], [] for i, x in enumerate(parts): if not recapstart.match(undiacritic(x)): lower.append(x) else: upper = parts[i:] break return lower, upper
def test_undiacritic(): from pyglottolog.monsterlib._bibtex_undiacritic import undiacritic if not PY2: return for i, o in [ ("\\cmd{äöüß}", "aouss"), ]: assert_equal(undiacritic(i), o)
def test_undiacritic(): from pyglottolog.monsterlib._bibtex_undiacritic import undiacritic if not PY2: return # pragma: no cover for i, o in [ ("\\cmd{äöüß}", "aouss"), ]: assert_equal(undiacritic(i), o)
def keyid(fields, fd, ti=2, infinity=float('inf')): if 'author' not in fields: if 'editor' not in fields: values = ''.join(v for f, v in bibord_iteritems(fields) if f != 'glottolog_ref_id') return '__missingcontrib__' + reokkey.sub('_', values.lower()) else: astring = fields['editor'] else: astring = fields['author'] authors = pauthor(astring) if len(authors) != len(astring.split(' and ')): print("Unparsed author in", authors) print(" ", astring, astring.split(' and ')) print(fields.get('title')) ak = [ undiacritic(x) for x in sorted( lastnamekey(a['lastname']) for a in authors) ] yk = pyear(fields.get('year', '[nd]'))[:4] tks = wrds(fields.get("title", "no.title")) # takeuntil : # select the (leftmost) two least frequent words from the title types = list(unique(w for w in tks if rewrdtok.match(w))) tk = nsmallest(ti, types, key=lambda w: fd.get(w, infinity)) # put them back into the title order (i.e. 'spam eggs' != 'eggs spam') order = {w: i for i, w in enumerate(types)} tk.sort(key=lambda w: order[w]) if 'volume' in fields and all(f not in fields for f in ['journal', 'booktitle', 'series']): vk = roman(fields['volume']) else: vk = '' if 'extra_hash' in fields: yk = yk + fields['extra_hash'] key = '-'.join(ak) + "_" + '-'.join(tk) + vk + yk return reokkey.sub("", key.lower())
def keyid(fields, fd, ti=2, infinity=float('inf')): if 'author' not in fields: if 'editor' not in fields: values = ''.join( v for f, v in bibord_iteritems(fields) if f != 'glottolog_ref_id') return '__missingcontrib__' + reokkey.sub('_', values.lower()) else: astring = fields['editor'] else: astring = fields['author'] authors = pauthor(astring) if len(authors) != len(astring.split(' and ')): print("Unparsed author in", authors) print(" ", astring, astring.split(' and ')) print(fields.get('title')) ak = [undiacritic(x) for x in sorted(lastnamekey(a['lastname']) for a in authors)] yk = pyear(fields.get('year', '[nd]'))[:4] tks = wrds(fields.get("title", "no.title")) # takeuntil : # select the (leftmost) two least frequent words from the title types = list(unique(w for w in tks if rewrdtok.match(w))) tk = nsmallest(ti, types, key=lambda w: fd.get(w, infinity)) # put them back into the title order (i.e. 'spam eggs' != 'eggs spam') order = {w: i for i, w in enumerate(types)} tk.sort(key=lambda w: order[w]) if 'volume' in fields and all( f not in fields for f in ['journal', 'booktitle', 'series']): vk = roman(fields['volume']) else: vk = '' if 'extra_hash' in fields: yk = yk + fields['extra_hash'] key = '-'.join(ak) + "_" + '-'.join(tk) + vk + yk return reokkey.sub("", key.lower())
def wrds(txt): txt = undiacritic(txt.lower()) txt = txt.replace("'", "").replace('"', "") return [x for x in resplittit.split(txt) if x]