def find_entity(author): name = author['name'] things = find_author(name) if author['entity_type'] != 'person': if not things: return None db_entity = withKey(things[0]) if db_entity['type']['key'] == '/type/redirect': db_entity = withKey(db_entity['location']) assert db_entity['type']['key'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for key in things: if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/redirect': key = db_entity['location'] if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/delete': continue try: assert db_entity['type']['key'] == '/type/author' except: print name, key, db_entity raise if 'birth_date' in author and 'birth_date' not in db_entity: continue if 'birth_date' not in author and 'birth_date' in db_entity: continue if not author_dates_match(author, db_entity): continue match.append(db_entity) if not match: return None if len(match) == 1: return match[0] try: return pick_from_matches(author, match) except ValueError: print 'author:', author print 'match:', match raise
def search(author, name): book_fields = ('title_prefix', 'title'); q = { 'type': '/type/edition', 'authors': author, 'title_prefix': None, 'title': None, 'isbn_10': None} found = list(query_iter(q)) db_author = '' names = set([name]) t = '' books = [] for e in found: locs = set() for i in e['isbn_10'] or []: locs.update(search_query('isbn', i)) if not locs: books.append((e['key'], (e['title_prefix'] or '') + e['title'], None, [])) continue found = data_from_marc(locs, name) if len(found) != 1: locs = [] for i in found.values(): locs.append(i) books.append((e['key'], (e['title_prefix'] or '') + e['title'], None, locs)) continue marc_author = found.keys()[0] locs = found.values()[0] names.update(marc_author[0:2]) books.append((e['key'], (e['title_prefix'] or '') + e['title'], marc_author, locs)) authors = [] names2 = set() for n in names: if ', ' in n: continue i = n.rfind(' ') names2.add("%s, %s" % (n[i+1:], n[:i])) names.update(names2) for n in names: for a in author_search(n): authors.append(a) for a in authors: q = { 'type': '/type/edition', 'authors': a['key'], 'title_prefix': None, 'title': None, 'isbn_10': None } a['editions'] = list(query_iter(q)) author_map = {} for key, title, a, locs in books: t += '<tr><td><a href="http://openlibrary.org' + key + '">' + web.htmlquote(title) + '</a>' t += '<br>' + ', '.join('<a href="http://openlibrary.org/show-marc/%s">%s</a>' % (i, i) for i in locs) + '</td>' # t += '<td>' + web.htmlquote(repr(a[2])) + '</td>' if a: if a[2] not in author_map: dates = {'birth_date': a[2][0], 'death_date': a[2][1], 'dates': a[2][2]} db_match = [db for db in authors if author_dates_match(dates, db)] author_map[a[2]] = db_match[0] if len(db_match) == 1 else None match = author_map[a[2]] if match: t += '<td><a href="http://openlibrary.org%s">%s-%s</a></td>' % (match['key'], match['birth_date'] or '', match['death_date'] or '') else: t += '<td>%s-%s (no match)</td>' % (dates['birth_date'] or '', dates['death_date'] or '') t += '</tr>\n' ret = '' if authors: ret += '<ul>' for a in authors: ret += '<li><a href="http://openlibrary.org%s">%s</a> (%s-%s) %d editions' % (a['key'], web.htmlquote(name), a['birth_date'] or '', a['death_date'] or '', len(a['editions'])) ret += '</ul>' return ret + '<table>' + t + '</table>'
def search(author, name): book_fields = ('title_prefix', 'title'); q = { 'type': '/type/edition', 'authors': author, 'title_prefix': None, 'title': None, 'isbn_10': None} found = list(query_iter(q)) db_author = '' names = set([name]) t = '' books = [] for e in found: locs = set() for i in e['isbn_10'] or []: locs.update(search_query('isbn', i)) if not locs: books.append((e['key'], (e['title_prefix'] or '') + e['title'], None, [])) continue found = data_from_marc(locs, name) if len(found) != 1: locs = [] for i in found.values(): locs.append(i) books.append((e['key'], (e['title_prefix'] or '') + e['title'], None, locs)) continue marc_author = found.keys()[0] locs = found.values()[0] names.update(marc_author[0:2]) books.append((e['key'], (e['title_prefix'] or '') + e['title'], marc_author, locs)) authors = [] names2 = set() for n in names: if ', ' in n: continue i = n.rfind(' ') names2.add("%s, %s" % (n[i+1:], n[:i])) names.update(names2) for n in names: for a in author_search(n): authors.append(a) for a in authors: q = { 'type': '/type/edition', 'authors': a['key'], 'title_prefix': None, 'title': None, 'isbn_10': None } a['editions'] = list(query_iter(q)) author_map = {} for key, title, a, locs in books: t += '<tr><td><a href="http://openlibrary.org' + key + '">' + web.htmlquote(title) + '</a>' t += '<br>' + ', '.join('<a href="http://openlibrary.org/show-marc/%s">%s</a>' % (i, i) for i in locs) + '</td>' # t += '<td>' + web.htmlquote(`a[2]`) + '</td>' if a: if a[2] not in author_map: dates = {'birth_date': a[2][0], 'death_date': a[2][1], 'dates': a[2][2]} db_match = [db for db in authors if author_dates_match(dates, db)] author_map[a[2]] = db_match[0] if len(db_match) == 1 else None match = author_map[a[2]] if match: t += '<td><a href="http://openlibrary.org%s">%s-%s</a></td>' % (match['key'], match['birth_date'] or '', match['death_date'] or '') else: t += '<td>%s-%s (no match)</td>' % (dates['birth_date'] or '', dates['death_date'] or '') t += '</tr>\n' ret = '' if authors: ret += '<ul>' for a in authors: ret += '<li><a href="http://openlibrary.org%s">%s</a> (%s-%s) %d editions' % (a['key'], web.htmlquote(name), a['birth_date'] or '', a['death_date'] or '', len(a['editions'])) ret += '</ul>' return ret + '<table>' + t + '</table>'