def identify_doubles(doubles=doubles, repo=repo):
    doubles = doubles.values()
    total = len(doubles)
    i = 0
#    doubles = doubles[:10]
    for ls in doubles:
        #find the biography for each item in the list
        i += 1
        print i, 'of', total, ':', ls
        bios = [repo.get_biography(local_id=id) for id in ls]
        
        #find the person for each biography
        persons = [bio.get_person() for bio in bios]
        #identify the persons
        p1 = persons[0]
        for p2 in persons[1:]:
            print 'identifying', p1, p2
            repo.identify(p1, p2)
def doubles_in_suggestions_list(doubles=doubles, repo=repo):
    doubles = doubles.values()
    total = len(doubles)
    i = 0
#    doubles = doubles[:10]
    for ls in doubles:
        #ls is a list of lcoal ids
        #find the biography for each item in the list
        i += 1
        print i, 'of', total, ':', ls
        bios = [repo.get_biography(local_id=id) for id in ls]
        
        #find the person for each biography
        persons = [bio.get_person() for bio in bios]
        #add to similiarty cache
        p1 = persons[0]
        for p2 in persons[1:]:
            print 'add to similairty cache', p1, p2
            repo.db.add_to_similarity_cache(p1.bioport_id, p2.bioport_id, 1.0)