def cluster_faces(name, img_list = 'all-scores-faces-list-new'): root = root_all + 'face_recognition/'+ '@'.join(name.split('-')) cnn_root = root_all + 'face_recognition_CNN/'+name + '/' f = open(cnn_root + 'waldo_normalized_combined.cPickle','r') combined_matrix = cPickle.load(f) f.close() diag = np.diag(combined_matrix) diag = diag[:, np.newaxis] normalize_matrix = np.dot(diag, np.transpose(diag)) normalize_matrix = np.sqrt(normalize_matrix) affinity_matrix = np.divide(combined_matrix, normalize_matrix) min_ = np.min(affinity_matrix); max_ = np.max(affinity_matrix) affinity_matrix = (affinity_matrix - min_) / (max_ - min_) f = SpectralClustering(affinity='precomputed', n_clusters=min(8, affinity_matrix.shape[0] - 1), eigen_solver = 'arpack', n_neighbors=min(5, affinity_matrix.shape[0])) a = f.fit_predict(affinity_matrix) groups = {} temp = zip(a, xrange(len(a))) for i in temp: if i[0] not in groups: groups[i[0]] = [i[1]] else: groups[i[0]].append(i[1]) unique_person_id = [] for kk in groups: min_similarity = np.Inf max_similarity = -np.Inf mean_similarity = 0 this_group_ids = groups[kk] for j in xrange(len(this_group_ids)): for i in xrange(j+1, len(this_group_ids)): temp = combined_matrix[this_group_ids[i],this_group_ids[j]] if temp < min_similarity: min_similarity = temp if temp > max_similarity: max_similarity = temp mean_similarity += temp mean_similarity /= max(1, len(this_group_ids)*(len(this_group_ids) - 1) / 2) print len(this_group_ids), mean_similarity, max_similarity, min_similarity print mean_similarity if mean_similarity > 0.4 and len(this_group_ids) > 1: unique_person_id.append(kk) important_person = [] for i in unique_person_id: important_person.append([i, len(groups[i])]) important_person.sort(key = lambda x:x[1], reverse=True) in_path = root + '-dir/' + img_list imgs_list = [] with open(in_path, 'r') as data: for line in data: line = line[:-1] imgs_list.append(line.split('/')[-1]) temp = zip(a, imgs_list) face_groups = {} for i in temp: if i[0] not in face_groups: face_groups[i[0]] = [i[1]] else: face_groups[i[0]].append(i[1]) create_face_group_html(name, face_groups, important_person) f = open(cnn_root + 'waldo_group_combined.cPickle','w') cPickle.dump([face_groups, important_person], f) f.close()
def cluster_faces_new(name = '9_68821308@N00', img_list = 'all-scores-faces-list'): #cnn_root = root_all + 'face_recognition_CNN/'+('-').join(name.split('@')) + '/' in_path = root + name + '-dir/all-scores-faces-list-new-pw.mat' try: temp = sio.loadmat(in_path) except: in_path = root + name + '-dir/all-scores-faces-list-pw.mat' temp = sio.loadmat(in_path) matrix = temp['matrix'] if len(matrix) == 0: out_root = root + name + '-dir/' f = open(out_root + '_20_group.cPickle','wb') cPickle.dump([{}, []], f) f.close() return #print matrix '''1st method: exponential''' #a_std = np.std(matrix) #beta = 1.0 #affinity_matrix = np.exp(beta * matrix / a_std) '''2nd method: sigmoid''' #affinity_matrix = 1 / (1 + np.exp(-matrix)) '''3rd method: normalize''' if len(matrix) == 1: imgs_list = [] with open(in_path, 'r') as data: for line in data: line = line[:-1] imgs_list.append(line.split('/')[-1]) out_root = root + name + '-dir/' f = open(out_root + '_20_group.cPickle','wb') cPickle.dump([{img_list[0]:1}, []], f) f.close() return matrix_ori = matrix min_ = np.min(matrix) matrix = matrix - min_ diag = np.diag(matrix) diag = diag[:, np.newaxis] normalize_matrix = np.dot(diag, np.transpose(diag)) normalize_matrix = np.sqrt(normalize_matrix) affinity_matrix = np.divide(matrix, normalize_matrix) min_ = np.min(affinity_matrix); max_ = np.max(affinity_matrix) affinity_matrix = (affinity_matrix - min_) / (max_ - min_) f = SpectralClustering(affinity='precomputed', n_clusters=min(30, affinity_matrix.shape[0]/2), eigen_solver = 'arpack', n_neighbors=min(10, affinity_matrix.shape[0])) #b = f.fit(affinity_matrix) a = f.fit_predict(affinity_matrix) mean_similarities = {} groups_notyet = {} temp = zip(a, xrange(len(a))) for i in temp: if i[0] not in groups_notyet: groups_notyet[i[0]] = [i[1]] else: groups_notyet[i[0]].append(i[1]) groups_already = {} dump_low_mean_(matrix_ori, groups_already, groups_notyet) while len(groups_notyet) > 1: keys = groups_notyet.keys() max_ = [-1,-1] max_median = -np.Inf for i in xrange(len(keys)): for j in xrange(i + 1, len(keys)): group_i = groups_notyet[keys[i]] group_j = groups_notyet[keys[j]] temp = cal_median_distance(matrix_ori, group_i, group_j) #print temp if temp > max_median: max_median = temp #print temp max_ = [keys[i],keys[j]] #print #if max_[0] == -1: # break #print matrix_ori[np.ix_(groups_notyet[max_[0]], groups_notyet[max_[1]])] temp = cal_median_distance_ingroup(matrix_ori, groups_notyet[max_[0]] + groups_notyet[max_[1]]) #print temp #if cal_median_distance_ingroup(matrix_ori, groups_notyet[max_[0]] + groups_notyet[max_[1]]) > -10: if cal_median_distance_ingroup(matrix_ori, groups_notyet[max_[0]] + groups_notyet[max_[1]]) > -10: temp = groups_notyet[max_[0]] + groups_notyet[max_[1]] groups_notyet.pop(max_[0], None) groups_notyet.pop(max_[1], None) groups_notyet[max_[0]] = temp else: groups_already[max_[0]] = groups_notyet[max_[0]] groups_already[max_[1]] = groups_notyet[max_[1]] groups_notyet.pop(max_[0], None) groups_notyet.pop(max_[1], None) #print len(groups_notyet) for i in groups_notyet: groups_already[i] = groups_notyet[i] #print len(groups_already), groups_already unique_person_id = [] for kk in groups_already: min_similarity = np.Inf max_similarity = -np.Inf mean_similarity = 0 #median_similarity = [] this_group_ids = groups_already[kk] for j in xrange(len(this_group_ids)): for i in xrange(j+1, len(this_group_ids)): temp = matrix_ori[this_group_ids[i],this_group_ids[j]] if temp < min_similarity: min_similarity = temp if temp > max_similarity: max_similarity = temp #mean_similarity += np.log10(temp) mean_similarity += temp #median_similarity.append(temp) mean_similarity /= max(1, len(this_group_ids)*(len(this_group_ids) - 1) / 2) mean_similarities[kk] = mean_similarity #if len(median_similarity) >= 1: # median_ = np.median(np.array(median_similarity)) #else: # median_ = 0 #mean_similarities[kk] = median_ #print len(this_group_ids), mean_similarity, max_similarity, min_similarity if mean_similarity > 0 and len(this_group_ids) > 1: #if median_ > 0 and len(this_group_ids) > 1: unique_person_id.append(kk) important_person = [] for i in unique_person_id: important_person.append([i, len(groups_already[i])]) important_person.sort(key = lambda x:x[1], reverse=True) in_path = root + name + '-dir/' + img_list imgs_list = [] with open(in_path, 'r') as data: for line in data: line = line[:-1] imgs_list.append(line.split('/')[-1]) a = np.zeros(len(imgs_list)) for i in groups_already: for j in groups_already[i]: a[j] = i temp = zip(a, imgs_list) face_groups = {} for i in temp: if i[0] not in face_groups: face_groups[i[0]] = [i[1]] else: face_groups[i[0]].append(i[1]) create_retrieval_image(name, matrix) create_face_group_html(name, face_groups, important_person, mean_similarities) out_root = root + name + '-dir/' f = open(out_root + '_20_group.cPickle','wb') cPickle.dump([face_groups, important_person], f) f.close()