Esempio n. 1
0
def cluster_faces(name, img_list = 'all-scores-faces-list-new'):
    root = root_all + 'face_recognition/'+ '@'.join(name.split('-'))
    cnn_root = root_all + 'face_recognition_CNN/'+name + '/'

    f = open(cnn_root + 'waldo_normalized_combined.cPickle','r')
    combined_matrix = cPickle.load(f)
    f.close()

    diag = np.diag(combined_matrix)
    diag = diag[:, np.newaxis]
    normalize_matrix = np.dot(diag, np.transpose(diag))
    normalize_matrix = np.sqrt(normalize_matrix)
    affinity_matrix = np.divide(combined_matrix, normalize_matrix)
    min_ = np.min(affinity_matrix); max_ = np.max(affinity_matrix)
    affinity_matrix =  (affinity_matrix - min_) / (max_ - min_)


    f = SpectralClustering(affinity='precomputed', n_clusters=min(8, affinity_matrix.shape[0] - 1), eigen_solver = 'arpack', n_neighbors=min(5, affinity_matrix.shape[0]))
    a = f.fit_predict(affinity_matrix)

    groups = {}
    temp = zip(a, xrange(len(a)))
    for i in temp:
        if i[0] not in groups:
            groups[i[0]] = [i[1]]
        else:
            groups[i[0]].append(i[1])
    unique_person_id = []
    for kk in groups:
        min_similarity = np.Inf
        max_similarity = -np.Inf
        mean_similarity = 0
        this_group_ids = groups[kk]
        for j in xrange(len(this_group_ids)):
            for i in xrange(j+1, len(this_group_ids)):
                temp = combined_matrix[this_group_ids[i],this_group_ids[j]]
                if temp < min_similarity:
                    min_similarity = temp
                if temp > max_similarity:
                    max_similarity = temp
                mean_similarity += temp
        mean_similarity /= max(1, len(this_group_ids)*(len(this_group_ids) - 1) / 2)
        print len(this_group_ids), mean_similarity, max_similarity, min_similarity
        print mean_similarity
        if mean_similarity > 0.4 and len(this_group_ids) > 1:
            unique_person_id.append(kk)
    important_person = []
    for i in unique_person_id:
        important_person.append([i, len(groups[i])])
    important_person.sort(key = lambda x:x[1], reverse=True)
    in_path = root + '-dir/' + img_list
    imgs_list = []
    with open(in_path, 'r') as data:
        for line in data:
            line = line[:-1]
            imgs_list.append(line.split('/')[-1])

    temp = zip(a, imgs_list)
    face_groups = {}
    for i in temp:
        if i[0] not in face_groups:
            face_groups[i[0]] = [i[1]]
        else:
            face_groups[i[0]].append(i[1])

    create_face_group_html(name, face_groups, important_person)

    f = open(cnn_root + 'waldo_group_combined.cPickle','w')
    cPickle.dump([face_groups, important_person], f)
    f.close()
Esempio n. 2
0
def cluster_faces_new(name = '9_68821308@N00', img_list = 'all-scores-faces-list'):
    #cnn_root = root_all + 'face_recognition_CNN/'+('-').join(name.split('@')) + '/'
    in_path = root + name + '-dir/all-scores-faces-list-new-pw.mat'
    try:
        temp = sio.loadmat(in_path)
    except:
        in_path = root + name + '-dir/all-scores-faces-list-pw.mat'
        temp = sio.loadmat(in_path)
    matrix = temp['matrix']
    
    if len(matrix) == 0:
        out_root = root + name + '-dir/'
        f = open(out_root + '_20_group.cPickle','wb')
        cPickle.dump([{}, []], f)
        f.close()
        return
    #print matrix

    '''1st method: exponential'''
    #a_std = np.std(matrix)
    #beta = 1.0
    #affinity_matrix = np.exp(beta * matrix / a_std)

    '''2nd method: sigmoid'''
    #affinity_matrix = 1 / (1 + np.exp(-matrix))

    '''3rd method: normalize'''

    if len(matrix) == 1:
        imgs_list = []
        with open(in_path, 'r') as data:
            for line in data:
                line = line[:-1]
                imgs_list.append(line.split('/')[-1])
        out_root = root + name + '-dir/'
        f = open(out_root + '_20_group.cPickle','wb')
        cPickle.dump([{img_list[0]:1}, []], f)
        f.close()
        return        
        
    matrix_ori = matrix
    min_ = np.min(matrix)
    matrix = matrix - min_
    diag = np.diag(matrix)
    diag = diag[:, np.newaxis]
    normalize_matrix = np.dot(diag, np.transpose(diag))
    normalize_matrix = np.sqrt(normalize_matrix)
    affinity_matrix = np.divide(matrix, normalize_matrix)
    min_ = np.min(affinity_matrix); max_ = np.max(affinity_matrix)
    affinity_matrix =  (affinity_matrix - min_) / (max_ - min_)
    f = SpectralClustering(affinity='precomputed', n_clusters=min(30, affinity_matrix.shape[0]/2), eigen_solver = 'arpack', n_neighbors=min(10, affinity_matrix.shape[0]))

    #b = f.fit(affinity_matrix)
    a = f.fit_predict(affinity_matrix)
    mean_similarities = {}
    groups_notyet = {}
    temp = zip(a, xrange(len(a)))
    for i in temp:
        if i[0] not in groups_notyet:
            groups_notyet[i[0]] = [i[1]]
        else:
            groups_notyet[i[0]].append(i[1])

    groups_already = {}
    dump_low_mean_(matrix_ori, groups_already, groups_notyet)
    while len(groups_notyet) > 1:
        keys = groups_notyet.keys()
        max_ = [-1,-1]
        max_median = -np.Inf
        for i in xrange(len(keys)):
            for j in xrange(i + 1, len(keys)):
                group_i = groups_notyet[keys[i]]
                group_j = groups_notyet[keys[j]]
                temp = cal_median_distance(matrix_ori, group_i, group_j)
                #print temp
                if temp > max_median:
                    max_median = temp
                    #print temp
                    max_ = [keys[i],keys[j]]
        #print
        #if max_[0] == -1:
        #    break
        #print matrix_ori[np.ix_(groups_notyet[max_[0]], groups_notyet[max_[1]])]
        temp = cal_median_distance_ingroup(matrix_ori, groups_notyet[max_[0]] + groups_notyet[max_[1]])
        #print temp
        #if cal_median_distance_ingroup(matrix_ori, groups_notyet[max_[0]] + groups_notyet[max_[1]]) > -10:
        if cal_median_distance_ingroup(matrix_ori, groups_notyet[max_[0]] + groups_notyet[max_[1]]) > -10:
            temp = groups_notyet[max_[0]] + groups_notyet[max_[1]]
            groups_notyet.pop(max_[0], None)
            groups_notyet.pop(max_[1], None)
            groups_notyet[max_[0]] = temp
        else:
            groups_already[max_[0]] = groups_notyet[max_[0]]
            groups_already[max_[1]] = groups_notyet[max_[1]]
            groups_notyet.pop(max_[0], None)
            groups_notyet.pop(max_[1], None)
        #print len(groups_notyet)
    for i in groups_notyet:
        groups_already[i] = groups_notyet[i]
    #print len(groups_already), groups_already


    unique_person_id = []
    for kk in groups_already:
        min_similarity = np.Inf
        max_similarity = -np.Inf
        mean_similarity = 0
        #median_similarity = []
        this_group_ids = groups_already[kk]
        for j in xrange(len(this_group_ids)):
            for i in xrange(j+1, len(this_group_ids)):
                temp = matrix_ori[this_group_ids[i],this_group_ids[j]]
                if temp < min_similarity:
                    min_similarity = temp
                if temp > max_similarity:
                    max_similarity = temp
                #mean_similarity += np.log10(temp)
                mean_similarity += temp
                #median_similarity.append(temp)
        mean_similarity /= max(1, len(this_group_ids)*(len(this_group_ids) - 1) / 2)
        mean_similarities[kk] = mean_similarity
        #if len(median_similarity) >= 1:
        #    median_ = np.median(np.array(median_similarity))
        #else:
        #    median_ = 0
        #mean_similarities[kk] = median_
        #print len(this_group_ids), mean_similarity, max_similarity, min_similarity
        if mean_similarity > 0 and len(this_group_ids) > 1:
        #if median_ > 0 and len(this_group_ids) > 1:
            unique_person_id.append(kk)
    important_person = []
    for i in unique_person_id:
        important_person.append([i, len(groups_already[i])])
    important_person.sort(key = lambda x:x[1], reverse=True)
    in_path = root + name + '-dir/' + img_list
    imgs_list = []
    with open(in_path, 'r') as data:
        for line in data:
            line = line[:-1]
            imgs_list.append(line.split('/')[-1])

    a = np.zeros(len(imgs_list))
    for i in groups_already:
        for j in groups_already[i]:
            a[j] = i
    temp = zip(a, imgs_list)
    face_groups = {}
    for i in temp:
        if i[0] not in face_groups:
            face_groups[i[0]] = [i[1]]
        else:
            face_groups[i[0]].append(i[1])
    create_retrieval_image(name, matrix)
    create_face_group_html(name, face_groups, important_person, mean_similarities)
    out_root = root + name + '-dir/'
    f = open(out_root + '_20_group.cPickle','wb')
    cPickle.dump([face_groups, important_person], f)
    f.close()