def groups_of_faces_by_distance_threshold(): from esper.embed_google_images import name_to_embedding from esper.face_embeddings import knn emb = name_to_embedding('Wolf Blitzer') increment = 0.05 max_thresh = 1.0 max_results_per_group = 50 exclude_labeled = False face_qs = UnlabeledFace.objects if exclude_labeled else Face.objects face_sims = knn(targets=[emb], max_threshold=max_thresh) results_by_bucket = {} for t in frange(min_thresh, max_thresh, increment): face_ids = [x for x, _ in filter(lambda z: z[1] >= t and z[1] < t + increment, face_sims)] if len(face_ids) != 0: faces = face_qs.filter( id__in=random.sample(face_ids, k=min(len(face_ids), max_results_per_group)) ).distinct('frame__video') if faces.count() == 0: continue results = qs_to_result(faces, limit=max_results_per_group, custom_order_by_id=face_ids) results_by_bucket[(t, t + increment, len(face_ids))] = results if len(results_by_bucket) == 0: raise Exception('No results to show') agg_results = [('in range=({:0.2f}, {:0.2f}), count={}'.format(k[0], k[1], k[2]), results_by_bucket[k]) for k in sorted(results_by_bucket.keys())] return group_results(agg_results)
def face_search(): from esper.embed_google_images import name_to_embedding from esper.face_embeddings import knn emb = name_to_embedding('Wolf Blitzer') face_ids = [x for x, _ in knn(targets=[emb], max_threshold=0.4)][::10] return qs_to_result( Face.objects.filter(id__in=face_ids), custom_order_by_id=face_ids, limit=len(face_ids))
def face_search_by_embeddings(embs, increment=0.05, max_thresh=1.2, exclude_labeled=False): min_thresh = 0. face_sims = knn(targets=embs, max_threshold=max_thresh) face_ids_to_score = {} results_by_bucket = {} for face_id, score in face_sims: if score >= min_thresh and score < max_thresh: face_ids_to_score[face_id] = score t = min_thresh + int((score - min_thresh) / increment) * increment bucket = (t, t + increment) if bucket not in results_by_bucket: results_by_bucket[bucket] = [] results_by_bucket[bucket].append(face_id) if len(results_by_bucket) == 0: raise Exception('No results to show') return results_by_bucket, face_ids_to_score
def face_search_with_exclusion(): from esper.embed_google_images import name_to_embedding from esper.face_embeddings import knn def exclude_faces(face_ids, exclude_ids, exclude_thresh): excluded_face_ids = set() for exclude_id in exclude_ids: excluded_face_ids.update([x for x, _ in knn(ids=[exclude_id], max_threshold=exclude_thresh)]) face_ids = set(face_ids) return face_ids - excluded_face_ids, face_ids & excluded_face_ids # Some params exclude_labeled = False show_excluded = False face_qs = UnlabeledFace.objects if exclude_labeled else Face.objects name = 'Wolf Blitzer' emb = name_to_embedding(name) face_ids = [x for x, _ in knn(features=emb, max_threshold=0.6)] kept_ids, excluded_ids = exclude_faces( face_ids, [1634585, 531076, 3273872, 2586010, 921211, 3176879, 3344886, 3660089, 249499, 2236580], 0.4) if show_excluded: # Show the furthest faces that we kept and the faces that were excluded kept_results = qs_to_result(face_qs.filter(id__in=kept_ids, shot__in_commercial=False), custom_order_by_id=face_ids[::-1]) excluded_results = qs_to_result(face_qs.filter(id__in=excluded_ids, shot__in_commercial=False)) return group_results([('excluded', excluded_results), (name, kept_results)]) else: # Show all of the faces that were kept return qs_to_result(face_qs.filter(id__in=kept_ids, shot__in_commercial=False), custom_order_by_id=face_ids,limit=len(face_ids))
def exclude_faces(face_ids, exclude_ids, exclude_thresh): excluded_face_ids = set() for exclude_id in exclude_ids: excluded_face_ids.update([x for x, _ in knn(ids=[exclude_id], max_threshold=exclude_thresh)]) face_ids = set(face_ids) return face_ids - excluded_face_ids, face_ids & excluded_face_ids
def face_search_by_id(): # Wolf Blitzer # target_face_ids = [975965, 5254043, 844004, 105093, 3801699, 4440669, 265071] # not_target_face_ids = [ # 1039037, 3132700, 3584906, 2057919, 3642645, 249473, 129685, 2569834, 5366608, # 4831099, 2172821, 1981350, 1095709, 4427683, 1762835] # Melania Trump # target_face_ids = [ # 2869846, 3851770, 3567361, 401073, 3943919, 5245641, 198592, 5460319, 5056617, # 1663045, 3794909, 1916340, 1373079, 2698088, 414847, 4608072] # not_target_face_ids = [] # Bernie Sanders target_face_ids = [ 644710, 4686364, 2678025, 62032, 13248, 4846879, 4804861, 561270, 2651257, 2083010, 2117202, 1848221, 2495606, 4465870, 3801638, 865102, 3861979, 4146727, 3358820, 2087225, 1032403, 1137346, 2220864, 5384396, 3885087, 5107580, 2856632, 335131, 4371949, 533850, 5384760, 3335516] not_target_face_ids = [ 2656438, 1410140, 4568590, 2646929, 1521533, 1212395, 178315, 1755096, 3476158, 3310952, 1168204, 3062342, 1010748, 1275607, 2190958, 2779945, 415610, 1744917, 5210138, 3288162, 5137166, 4169061, 3774070, 2595170, 382055, 2365443, 712023, 5214225, 178251, 1039121, 5336597, 525714, 4522167, 3613622, 5161408, 2091095, 741985, 521, 2589969, 5120596, 284825, 3361576, 1684384, 4437468, 5214225, 178251] from esper.face_embeddings import knn increment = 0.05 max_thresh = 1.0 max_results_per_group = 50 exclude_labeled = False face_qs = UnlabeledFace.objects if exclude_labeled else Face.objects face_sims = knn(ids=target_face_ids, max_threshold=max_thresh) face_sims_by_bucket = {} idx = 0 max_idx = len(face_sims) for t in frange(min_thresh, max_thresh, increment): start_idx = idx cur_thresh = t + increment while idx < max_idx and face_sims[idx][1] < cur_thresh: idx += 1 face_sims_by_bucket[t] = face_sims[start_idx:idx] results_by_bucket = {} for t in frange(min_thresh, max_thresh, increment): face_ids = [x for x, _ in face_sims_by_bucket[t]] if len(face_ids) != 0: faces = face_qs.filter( id__in=random.sample(face_ids, k=min(len(face_ids), max_results_per_group)) ).distinct('frame__video') if faces.count() == 0: continue results = qs_to_result(faces, limit=max_results_per_group, custom_order_by_id=face_ids) results_by_bucket[(t, t + increment, len(face_ids))] = results if len(results_by_bucket) == 0: raise Exception('No results to show') agg_results = [('in range=({:0.2f}, {:0.2f}), count={}'.format(k[0], k[1], k[2]), results_by_bucket[k]) for k in sorted(results_by_bucket.keys())] return group_results(agg_results)