def find_entity(author): name = author['name'] things = find_author(name) if author['entity_type'] != 'person': if not things: return None db_entity = withKey(things[0]) if db_entity['type']['key'] == '/type/redirect': db_entity = withKey(db_entity['location']) assert db_entity['type']['key'] == '/type/author' return db_entity if ', ' in name: things += find_author(flip_name(name)) match = [] seen = set() for key in things: if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/redirect': key = db_entity['location'] if key in seen: continue seen.add(key) db_entity = withKey(key) if db_entity['type']['key'] == '/type/delete': continue try: assert db_entity['type']['key'] == '/type/author' except: print name, key, db_entity raise if 'birth_date' in author and 'birth_date' not in db_entity: continue if 'birth_date' not in author and 'birth_date' in db_entity: continue if not author_dates_match(author, db_entity): continue match.append(db_entity) if not match: return None if len(match) == 1: return match[0] try: return pick_from_matches(author, match) except ValueError: print 'author:', author print 'match:', match raise
def build_query(loc, rec): if 'table_of_contents' in rec: assert not isinstance(rec['table_of_contents'][0], list) book = { 'type': { 'key': '/type/edition'}, } try: east = east_in_by_statement(rec) except: pprint(rec) raise if east: print rec langs = rec.get('languages', []) print langs if any(l['key'] == '/languages/zxx' for l in langs): print 'zxx found in langs' rec['languages'] = [l for l in langs if l['key'] != '/languages/zxx'] print 'fixed:', langs for l in rec.get('languages', []): print l if l['key'] == '/languages/ser': l['key'] = '/languages/srp' if l['key'] in ('/languages/end', '/languages/enk', '/languages/ent'): l['key'] = '/languages/eng' if l['key'] == '/languages/cro': l['key'] = '/languages/chu' if l['key'] == '/languages/jap': l['key'] = '/languages/jpn' if l['key'] == '/languages/fra': l['key'] = '/languages/fre' if l['key'] == '/languages/gwr': l['key'] = '/languages/ger' if l['key'] == '/languages/fr ': l['key'] = '/languages/fre' if l['key'] == '/languages/it ': l['key'] = '/languages/ita' if l['key'] == '/languages/fle': # flemish -> dutch l['key'] = '/languages/dut' assert withKey(l['key']) for k, v in rec.iteritems(): if k == 'authors': book[k] = [import_author(v[0], eastern=east)] continue if k in type_map: t = '/type/' + type_map[k] if isinstance(v, list): book[k] = [{'type': t, 'value': i} for i in v] else: book[k] = {'type': t, 'value': v} else: book[k] = v assert 'title' in book return book
def build_query(loc, rec): if 'table_of_contents' in rec: assert not isinstance(rec['table_of_contents'][0], list) book = { 'type': { 'key': '/type/edition'}, } try: east = east_in_by_statement(rec) except: pprint(rec) raise if east: print rec langs = rec.get('languages', []) print langs if any(l['key'] == '/languages/zxx' for l in langs): print 'zxx found in langs' rec['languages'] = [l for l in langs if l['key'] != '/languages/zxx'] print 'fixed:', langs for l in rec.get('languages', []): assert withKey(l) for k, v in rec.iteritems(): if k == 'authors': book[k] = [import_author(v[0], eastern=east)] continue if k in type_map: t = '/type/' + type_map[k] if isinstance(v, list): book[k] = [{'type': t, 'value': i} for i in v] else: book[k] = {'type': t, 'value': v} else: book[k] = v assert 'title' in book return book
def build_query(loc, rec): if 'table_of_contents' in rec: assert not isinstance(rec['table_of_contents'][0], list) book = { 'type': { 'key': '/type/edition' }, } try: east = east_in_by_statement(rec) except: pprint(rec) raise if east: print rec langs = rec.get('languages', []) print langs if any(l['key'] == '/languages/zxx' for l in langs): print 'zxx found in langs' rec['languages'] = [l for l in langs if l['key'] != '/languages/zxx'] print 'fixed:', langs for l in rec.get('languages', []): print l if l['key'] == '/languages/ser': l['key'] = '/languages/srp' if l['key'] in ('/languages/end', '/languages/enk', '/languages/ent', '/languages/enb'): l['key'] = '/languages/eng' if l['key'] == '/languages/emg': l['key'] = '/languages/eng' if l['key'] == '/languages/cro': l['key'] = '/languages/chu' if l['key'] == '/languages/jap': l['key'] = '/languages/jpn' if l['key'] == '/languages/fra': l['key'] = '/languages/fre' if l['key'] == '/languages/ila': l['key'] = '/languages/ita' if l['key'] == '/languages/gwr': l['key'] = '/languages/ger' if l['key'] == '/languages/fr ': l['key'] = '/languages/fre' if l['key'] == '/languages/it ': l['key'] = '/languages/ita' if l['key'] == '/languages/fle': # flemish -> dutch l['key'] = '/languages/dut' assert withKey(l['key']) for k, v in rec.iteritems(): if k == 'authors': book[k] = [import_author(v[0], eastern=east)] continue if k in type_map: t = '/type/' + type_map[k] if isinstance(v, list): book[k] = [{'type': t, 'value': i} for i in v] else: book[k] = {'type': t, 'value': v} else: book[k] = v assert 'title' in book return book