예제 #1
0
def lowerupper(s):
    parts = [x for x in relu.split(s) if x]
    lower = []
    upper = []
    for (i, x) in enumerate(parts):
        if not recapstart.match(undiacritic(x)):
            lower.append(x)
        else:
            upper = parts[i:]
            break
    return (lower, upper)
예제 #2
0
def lowerupper(s):
    parts = [x for x in relu.split(s) if x]
    lower = []
    upper = []
    for (i, x) in enumerate(parts):
        if not recapstart.match(undiacritic(x)):
            lower.append(x)
        else:
            upper = parts[i:]
            break
    return (lower, upper)
예제 #3
0
def keyid(fields, fd={}, ti=2, infinity=float('inf')):
    if not fields.has_key('author'):
        if not fields.has_key('editor'):
            values = ''.join(v for f, v in bibord_iteritems(fields)
                             if f != 'glottolog_ref_id')
            return '__missingcontrib__' + reokkey.sub('_', values.lower())
        else:
            astring = fields['editor']
    else:
        astring = fields['author']

    authors = pauthor(astring)
    if len(authors) != len(astring.split(' and ')):
        print "Unparsed author in", authors
        print "   ", astring, astring.split(' and ')
        print fields['title']

    ak = [
        undiacritic(x) for x in sorted(
            lastnamekey(a['lastname']) for a in authors)
    ]
    yk = pyear(fields.get('year', '[nd]'))[:4]
    tks = wrds(fields.get("title", "no.title"))  #takeuntil :
    # select the (leftmost) two least frequent words from the title
    types = uniqued(w for w in tks if rewrdtok.match(w))
    tk = nsmallest(ti, types, key=lambda w: fd.get(w, infinity))
    # put them back into the title order (i.e. 'spam eggs' != 'eggs spam')
    order = {w: i for i, w in enumerate(types)}
    tk.sort(key=lambda w: order[w])
    if fields.has_key(
            'volume') and not fields.has_key('journal') and not fields.has_key(
                'booktitle') and not fields.has_key('series'):
        vk = roman(fields['volume'])
    else:
        vk = ''

    if fields.has_key('extra_hash'):
        yk = yk + fields['extra_hash']

    key = '-'.join(ak) + "_" + '-'.join(tk) + vk + yk
    return reokkey.sub("", key.lower())
예제 #4
0
def keyid(fields, fd={}, ti=2, infinity=float('inf')):
    if not fields.has_key('author'):
        if not fields.has_key('editor'):
            values = ''.join(v for f, v in bibord_iteritems(fields)
                if f != 'glottolog_ref_id')
            return '__missingcontrib__' + reokkey.sub('_', values.lower())
        else:
            astring = fields['editor']
    else:
        astring = fields['author']

    authors = pauthor(astring)
    if len(authors) != len(astring.split(' and ')):
        print "Unparsed author in", authors
        print "   ", astring, astring.split(' and ')
        print fields['title']

    ak = [undiacritic(x) for x in sorted(lastnamekey(a['lastname']) for a in authors)]
    yk = pyear(fields.get('year', '[nd]'))[:4]
    tks = wrds(fields.get("title", "no.title")) #takeuntil :
    # select the (leftmost) two least frequent words from the title
    types = uniqued(w for w in tks if rewrdtok.match(w))
    tk = nsmallest(ti, types, key=lambda w: fd.get(w, infinity))
    # put them back into the title order (i.e. 'spam eggs' != 'eggs spam')
    order = {w: i for i, w in enumerate(types)}
    tk.sort(key=lambda w: order[w])
    if fields.has_key('volume') and not fields.has_key('journal') and not fields.has_key('booktitle') and not fields.has_key('series'):
        vk = roman(fields['volume'])
    else:
        vk = ''

    if fields.has_key('extra_hash'):
        yk = yk + fields['extra_hash']

    key = '-'.join(ak) + "_" + '-'.join(tk) + vk + yk
    return reokkey.sub("", key.lower())
예제 #5
0
def wrds(txt):
    txt = undiacritic(txt.lower())
    txt = txt.replace("'", "").replace('"', "")
    return [x for x in resplittit.split(txt) if x]
예제 #6
0
def wrds(txt):
    txt = undiacritic(txt.lower())
    txt = txt.replace("'", "").replace('"', "")
    return [x for x in resplittit.split(txt) if x]