Python normalize Examples, catalog.merge.normalize.normalize Python Examples

Example #1

0

Show file

File: __init__.py Project: candeira/openlibrary

def mk_norm(title):
    m = re_brackets.match(title)
    if m:
        title = m.group(1)
    norm = merge.normalize(title).strip(' ')
    norm = norm.replace(' and ', ' ')
    if norm.startswith('the '):
        norm = norm[4:]
    elif norm.startswith('a '):
        norm = norm[2:]
    return norm.replace('-', '').replace(' ', '')

Example #2

0

Show file

File: tests.py Project: vkandola/openlibrary

def mk_norm(title):
    m = re_brackets.match(title)
    if m:
        title = m.group(1)
    norm = merge.normalize(title).strip(' ')
    norm = norm.replace(' and ', ' ')
    if norm.startswith('the '):
        norm = norm[4:]
    elif norm.startswith('a '):
        norm = norm[2:]
    return norm.replace('-', '').replace(' ', '')

Example #3

0

Show file

File: web_ui.py Project: RaceList/openlibrary

def get_title_to_key(author):
    # get id to key mapping of all editions by author
    author_keys = get_author_keys(author)
    if not author_keys:
        return {}

    # get title to key mapping of all editions by author
    t0 = time()
    sql = "select key, value as title from thing, edition_str " \
        + "where thing.id = thing_id and key_id=3 and thing_id in (" \
        + "select thing_id from edition_ref, thing " \
        + "where edition_ref.key_id=11 and edition_ref.value = thing.id and thing.key in (" + author_keys + "))"
    print sql
    return {}
    title_to_key = {}
    for r in web.query(sql):
        t = normalize(r.title).strip('.')
        title_to_key.setdefault(t, []).append(r.key)
    return title_to_key

Example #4

0

Show file

def get_title_to_key(author):
    # get id to key mapping of all editions by author
    author_keys = get_author_keys(author)
    if not author_keys:
        return {}

    # get title to key mapping of all editions by author
    t0 = time()
    sql = "select key, value as title from thing, edition_str " \
        + "where thing.id = thing_id and key_id=3 and thing_id in (" \
        + "select thing_id from edition_ref, thing " \
        + "where edition_ref.key_id=11 and edition_ref.value = thing.id and thing.key in (" + author_keys + "))"
    print sql
    return {}
    title_to_key = {}
    for r in web.query(sql):
        t = normalize(r.title).strip('.')
        title_to_key.setdefault(t, []).append(r.key)
    return title_to_key

Example #5

0

Show file

File: web_ui.py Project: RaceList/openlibrary

def search(title, author):

    title_to_key = get_title_to_key(author)
    norm_title = normalize(title).strip('.')

    if norm_title not in title_to_key:
        print 'title not found'
        return

    pool = set(title_to_key[norm_title])

    editions = []
    seen = set()
    found_titles = {}
    found_isbn = {}
    while pool:
        key = pool.pop()
        seen.add(key)
        e = site.withKey(key)
        translation_of = None
        if False and e.notes:
            m = re_translation_of.search(e.notes)
            if m:
                translation_of = m.group(1).lower()
                pool.update(k for k in title_to_key[translation_of] if k not in seen)
                found_titles.setdefault(translation_of, []).append(key)
        if False and e.isbn_10:
            for i in e.isbn_10:
                found_isbn.setdefault(i, []).append(key)
            join_isbn = ', '.join(map(isbn_link, e.isbn_10))
        else:
            join_isbn = ''
        rec = {
            'key': key,
            'publish_date': e.publish_date,
            'publishers': ', '.join(p.encode('utf-8') for p in (e.publishers or [])),
            'isbn': join_isbn,
        }
        editions.append(rec)

        if e.work_titles:
            for t in e.work_titles:
                t=t.strip('.')
                pool.update(k for k in title_to_key.get(t.lower(), []) if k not in seen)
                found_titles.setdefault(t, []).append(key)
        if e.other_titles:
            for t in e.other_titles:
                t=t.strip('.')
                pool.update(k for k in title_to_key.get(t.lower(), []) if k not in seen)
                found_titles.setdefault(t, []).append(key)

    print '<table>'
    for e in sorted(editions, key=lambda e: e['publish_date'] and e['publish_date'][-4:]):
        print '<tr>'
        print '<td>', ol_link(e['key'])
        print '<td>', e['publish_date'], '</td><td>', e['publishers'], '</td>'
        print '<td>', e['isbn'], '</td>'
        print '</tr>'
    print '</table>'

    if found_titles:
        print '<h2>Other titles</h2>'
        print '<ul>'
        for k, v in found_titles.iteritems():
            if k == title:
                continue
            print '<li><a href="/?title=%s&author=%s">%s</a>' % (k, author, k),
            print 'from', ', '.join(ol_link(i) for i in v)
        print '</ul>'

    extra_isbn = {}
    for k, v in found_isbn.iteritems():
        for isbn, note in find_others(k, rc['amazon_other_editions']):
            if note.lower().find('audio') != -1:
                continue
            if isbn not in found_isbn:
                extra_isbn.setdefault(isbn, []).extend(v)

    if extra_isbn:
        print '<h2>Other ISBN</h2>'
        print '<ul>'
        for k in sorted(extra_isbn):
            print '<li>', isbn_link(k),
            print 'from', ', '.join(ol_link(i) for i in extra_isbn[k])
        print '</ul>'

Example #6

0

Show file

def search(title, author):

    title_to_key = get_title_to_key(author)
    norm_title = normalize(title).strip('.')

    if norm_title not in title_to_key:
        print 'title not found'
        return

    pool = set(title_to_key[norm_title])

    editions = []
    seen = set()
    found_titles = {}
    found_isbn = {}
    while pool:
        key = pool.pop()
        seen.add(key)
        e = site.withKey(key)
        translation_of = None
        if False and e.notes:
            m = re_translation_of.search(e.notes)
            if m:
                translation_of = m.group(1).lower()
                pool.update(k for k in title_to_key[translation_of]
                            if k not in seen)
                found_titles.setdefault(translation_of, []).append(key)
        if False and e.isbn_10:
            for i in e.isbn_10:
                found_isbn.setdefault(i, []).append(key)
            join_isbn = ', '.join(map(isbn_link, e.isbn_10))
        else:
            join_isbn = ''
        rec = {
            'key':
            key,
            'publish_date':
            e.publish_date,
            'publishers':
            ', '.join(p.encode('utf-8') for p in (e.publishers or [])),
            'isbn':
            join_isbn,
        }
        editions.append(rec)

        if e.work_titles:
            for t in e.work_titles:
                t = t.strip('.')
                pool.update(k for k in title_to_key.get(t.lower(), [])
                            if k not in seen)
                found_titles.setdefault(t, []).append(key)
        if e.other_titles:
            for t in e.other_titles:
                t = t.strip('.')
                pool.update(k for k in title_to_key.get(t.lower(), [])
                            if k not in seen)
                found_titles.setdefault(t, []).append(key)

    print '<table>'
    for e in sorted(
            editions,
            key=lambda e: e['publish_date'] and e['publish_date'][-4:]):
        print '<tr>'
        print '<td>', ol_link(e['key'])
        print '<td>', e['publish_date'], '</td><td>', e['publishers'], '</td>'
        print '<td>', e['isbn'], '</td>'
        print '</tr>'
    print '</table>'

    if found_titles:
        print '<h2>Other titles</h2>'
        print '<ul>'
        for k, v in found_titles.iteritems():
            if k == title:
                continue
            print '<li><a href="/?title=%s&author=%s">%s</a>' % (k, author, k),
            print 'from', ', '.join(ol_link(i) for i in v)
        print '</ul>'

    extra_isbn = {}
    for k, v in found_isbn.iteritems():
        for isbn, note in find_others(k, rc['amazon_other_editions']):
            if note.lower().find('audio') != -1:
                continue
            if isbn not in found_isbn:
                extra_isbn.setdefault(isbn, []).extend(v)

    if extra_isbn:
        print '<h2>Other ISBN</h2>'
        print '<ul>'
        for k in sorted(extra_isbn):
            print '<li>', isbn_link(k),
            print 'from', ', '.join(ol_link(i) for i in extra_isbn[k])
        print '</ul>'