コード例 #1
0
 def add_one_new_pic(self, pic_path, label):
     current_day = get_current_day()
     log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a')
     try:
         # 读入数据时已经转换成需要的尺寸
         result = self.extract_pic_feature(pic_path)
         if result == None:
             return False
         face_pic, pic_feature = result
         self.add_one_pic(pic_feature, label)
         pic_name = os.path.split(pic_path)[1]
         this_person_pic_folder = os.path.join(self.all_pic_data_folder,
                                               label)
         this_person_feature_folder = os.path.join(
             self.all_pic_feature_data_folder, label)
         if not os.path.exists(this_person_pic_folder):
             os.makedirs(this_person_pic_folder)
         if not os.path.exists(this_person_feature_folder):
             os.makedirs(this_person_feature_folder)
         # 直接存储图片对应的特征, 同时保存图片文件
         this_pic_feature_name = os.path.join(this_person_feature_folder,
                                              pic_name + '.p')
         msgpack_numpy.dump(pic_feature, open(this_pic_feature_name, 'wb'))
         this_pic_face_name = os.path.join(this_person_pic_folder,
                                           pic_name + '.jpg')
         cv2.imwrite(this_pic_face_name, face_pic)
         log_file.write(
             '\t'.join(map(str, [pic_path, this_pic_face_name])) + '\n')
         return True
     except:
         traceback.print_exc()
         return False
コード例 #2
0
def feature_trans_pca(src_pack_file, dst_pack_file):
    all_data = []

    person_feature_dic = msgpack_numpy.load(open(src_pack_file, 'rb'))
    for person_index, person in enumerate(person_feature_dic):
        feature_list = person_feature_dic.get(person)
        for index in range(len(feature_list)):
            try:
                if feature_list[index][1] == None:
                    continue
                this_feature = np.array(feature_list[index][1][0])
                all_data.append(this_feature)
            except:
                traceback.print_exc()
    all_data = np.asarray(all_data)
    pca = PCA(n_components=128)
    pca.fit(all_data)

    for person_index, person in enumerate(person_feature_dic):
        feature_list = person_feature_dic.get(person)
        for index in range(len(feature_list)):
            try:
                if feature_list[index][1] == None:
                    continue
                this_feature = np.array(feature_list[index][1][0])
                this_feature = np.reshape(this_feature, (1, this_feature.size))
                this_feature = pca.transform(this_feature)[0]
                feature_list[index][1][0] = this_feature
            except:
                traceback.print_exc()
    msgpack_numpy.dump(person_feature_dic, open(dst_pack_file, 'wb'))
コード例 #3
0
def create_sample_list(root_folder, train_valid_sample_list_file,
                       verif_sample_list_file):
    person_list = os.listdir(root_folder)
    train_valid_sample_list = []
    verif_sample_list = []
    # 留下2000人用于人脸验证模型
    for person_index, person_name in enumerate(person_list[:-2000]):
        person_path = os.path.join(root_folder, person_name)
        pic_list = map(
            lambda y: (y, person_index),
            map(lambda x: os.path.join(person_path, x),
                os.listdir(person_path)))
        train_valid_sample_list.extend(pic_list)
    np.random.shuffle(train_valid_sample_list)
    train_num = int(len(train_valid_sample_list) * 0.8)
    train_sample_list = train_valid_sample_list[:train_num]
    valid_sample_list = train_valid_sample_list[train_num:]
    msgpack_numpy.dump((train_sample_list, valid_sample_list),
                       open(train_valid_sample_list_file, 'wb'))
    for person_index, person_name in enumerate(person_list[-2000:]):
        person_path = os.path.join(root_folder, person_name)
        pic_list = map(
            lambda y: (y, person_index),
            map(lambda x: os.path.join(person_path, x),
                os.listdir(person_path)))
        verif_sample_list.extend(pic_list)
    msgpack_numpy.dump(verif_sample_list, open(verif_sample_list_file, 'wb'))
コード例 #4
0
def create_sample_list_batch_shuffle(root_folder,
                                     train_valid_sample_list_file):
    person_list = os.listdir(root_folder)
    train_sample_list = []
    valid_sample_list = []
    # 一个人平均有29张图片, 最多产生565.5对正样本, 所以没565个人进行一次shuffle, 在训练时, 一次读入16385个图片
    # 用于训练FaceNet等pair类型的模型
    batch_train_valid_sample_list = []
    for person_index, person_name in enumerate(person_list[:-2000]):
        person_path = os.path.join(root_folder, person_name)
        pic_list = map(
            lambda y: (y, person_index),
            map(lambda x: os.path.join(person_path, x),
                os.listdir(person_path)))
        if person_index > 0 and person_index % 565 == 0:
            np.random.shuffle(batch_train_valid_sample_list)
            train_num = int(len(batch_train_valid_sample_list) * 0.8)
            batch_train_sample_list = batch_train_valid_sample_list[:train_num]
            batch_valid_sample_list = batch_train_valid_sample_list[train_num:]
            train_sample_list.extend(batch_train_sample_list)
            valid_sample_list.extend(batch_valid_sample_list)
            batch_train_valid_sample_list = []
        else:
            batch_train_valid_sample_list.extend(pic_list)
    np.random.shuffle(batch_train_valid_sample_list)
    train_num = int(len(batch_train_valid_sample_list) * 0.8)
    batch_train_sample_list = batch_train_valid_sample_list[:train_num]
    batch_valid_sample_list = batch_train_valid_sample_list[train_num:]
    train_sample_list.extend(batch_train_sample_list)
    valid_sample_list.extend(batch_valid_sample_list)

    msgpack_numpy.dump((train_sample_list, valid_sample_list),
                       open(train_valid_sample_list_file, 'wb'))
コード例 #5
0
def main_distance():
    all_data = []
    all_label = []
    all_pic_path_list = []
    count = 0
    verif_path_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    for line in open(pair_file):
        if count % 100 == 0:
            print count
        count += 1
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            path1 = tmp[0]
            path2 = tmp[1]
            label = int(tmp[2])
            feature1 = verif_path_feature_dic.get(path1)
            feature2 = verif_path_feature_dic.get(path2)
            # pdb.set_trace()
            # predicts = pw.cosine_similarity(feature1, feature2)
            predicts = np.fabs(feature1-feature2)
            all_data.append(predicts)
            all_label.append(label)
            all_pic_path_list.append((path1, path2))

    data = np.asarray(all_data)
    # print data.shape
    # data = np.reshape(data, newshape=(data.shape[0], 1))
    data = np.reshape(data, newshape=(data.shape[0], data.shape[2]))
    label = np.asarray(all_label)
    print data.shape, label.shape
    msgpack_numpy.dump((data, label, all_pic_path_list), open('orl_verif_fc7_finetune_fc8.p', 'wb'))
コード例 #6
0
def extract_triplet_feature():
    lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    new_lfw_feature_dic = {}
    model_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.model'
    weight_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.weight'
    model = model_from_json(open(model_file, 'r').read())
    opt = Adam()
    model.compile(optimizer=opt, loss=['categorical_crossentropy'])
    model.load_weights(weight_file)

    # pdb.set_trace()
    get_Conv_FeatureMap = K.function(
        [model.layers[2].layers[0].get_input_at(False),
         K.learning_phase()],
        [model.layers[2].layers[-1].get_output_at(False)])
    for person in lfw_feature_dic:
        # print person
        this_person_feature_list = lfw_feature_dic.get(person)
        this_person_triplet_feature_list = []
        for feature, path in this_person_feature_list:
            feature = np.reshape(feature, (1, feature.size))
            new_feature = get_Conv_FeatureMap([feature, 0])[0].copy()
            this_person_triplet_feature_list.append((new_feature, path))

        new_lfw_feature_dic[person] = this_person_triplet_feature_list
    msgpack_numpy.dump(new_lfw_feature_dic,
                       open(triplet_feature_pack_file, 'wb'))
コード例 #7
0
def create_sample_list_batch_shuffle(root_folder, train_valid_sample_list_file):
    person_list = os.listdir(root_folder)
    train_sample_list = []
    valid_sample_list = []
    # 一个人平均有29张图片, 最多产生565.5对正样本, 所以没565个人进行一次shuffle, 在训练时, 一次读入16385个图片
    # 用于训练FaceNet等pair类型的模型
    batch_train_valid_sample_list = []
    for person_index, person_name in enumerate(person_list[:-2000]):
        person_path = os.path.join(root_folder, person_name)
        pic_list = map(lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path)))
        if person_index > 0 and person_index % 565 == 0:
            np.random.shuffle(batch_train_valid_sample_list)
            train_num = int(len(batch_train_valid_sample_list) * 0.8)
            batch_train_sample_list = batch_train_valid_sample_list[:train_num]
            batch_valid_sample_list = batch_train_valid_sample_list[train_num:]
            train_sample_list.extend(batch_train_sample_list)
            valid_sample_list.extend(batch_valid_sample_list)
            batch_train_valid_sample_list = []
        else:
            batch_train_valid_sample_list.extend(pic_list)
    np.random.shuffle(batch_train_valid_sample_list)
    train_num = int(len(batch_train_valid_sample_list) * 0.8)
    batch_train_sample_list = batch_train_valid_sample_list[:train_num]
    batch_valid_sample_list = batch_train_valid_sample_list[train_num:]
    train_sample_list.extend(batch_train_sample_list)
    valid_sample_list.extend(batch_valid_sample_list)

    msgpack_numpy.dump((train_sample_list, valid_sample_list), open(train_valid_sample_list_file, 'wb'))
コード例 #8
0
 def add_one_new_pic(self, pic_path, label):
     current_day = get_current_day()
     log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a')
     try:
         # 读入数据时已经转换成需要的尺寸
         result = self.extract_pic_feature(pic_path)
         if result == None:
             return False
         face_pic, pic_feature = result
         self.add_one_pic(pic_feature, label)
         pic_name = os.path.split(pic_path)[1]
         this_person_pic_folder = os.path.join(self.all_pic_data_folder, label)
         this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, label)
         if not os.path.exists(this_person_pic_folder):
             os.makedirs(this_person_pic_folder)
         if not os.path.exists(this_person_feature_folder):
             os.makedirs(this_person_feature_folder)
         # 直接存储图片对应的特征, 同时保存图片文件
         this_pic_feature_name = os.path.join(this_person_feature_folder, pic_name + '.p')
         msgpack_numpy.dump(pic_feature, open(this_pic_feature_name, 'wb'))
         this_pic_face_name = os.path.join(this_person_pic_folder, pic_name + '.jpg')
         cv2.imwrite(this_pic_face_name, face_pic)
         log_file.write('\t'.join(map(str, [pic_path, this_pic_face_name]))+'\n')
         return True
     except:
         traceback.print_exc()
         return False
コード例 #9
0
 def save_pic_feature(self, pic_path, person_name):
     #  将已经存在的文件生成特征并保存到指定文件夹下, 用于管理员加入新的图片(加入新的图片后, 提取特征, 保存到指定文件夹)
     person_pic_folder_path = os.path.join(self.all_pic_data_folder,
                                           person_name)
     person_feature_folder_path = os.path.join(
         self.all_pic_feature_data_folder, person_name)
     if not os.path.exists(person_pic_folder_path):
         os.makedirs(person_pic_folder_path)
     if not os.path.exists(person_feature_folder_path):
         os.makedirs(person_feature_folder_path)
     pic_name = os.path.split(pic_path)[-1]
     # 特征文件
     person_feature_path = os.path.join(person_feature_folder_path,
                                        pic_name)
     # 人脸文件
     person_pic_path = os.path.join(person_pic_folder_path, pic_name)
     result = extract_feature_from_binary_data(open(pic_path, 'rb'))
     if result == None:
         return
     face_num, all_frames, all_feature = result
     biggest_face_index = find_big_face(all_frames)
     pic_frame = all_frames[biggest_face_index]
     pic_feature = all_feature[biggest_face_index]
     x, y, width, height = pic_frame
     face_pic = cv2.imread(pic_path)[y:y + width, x:x + height, :]
     cv2.imwrite(person_pic_path, face_pic)
     msgpack_numpy.dump(pic_feature, open(person_feature_path, 'wb'))
コード例 #10
0
def create_train_valid_data(folder='/data/liubo/face/crop_face'):
    # 根据已经存在的数据训练人脸验证模型
    person_list = os.listdir(folder)
    path_feature_dic = {}  #
    for person in person_list:
        print person
        person_path = os.path.join(folder, person)
        pic_feature_list = os.listdir(person_path)
        for pic_feature_path in pic_feature_list:
            pic_feature_path = os.path.join(person_path, pic_feature_path)
            pic_feature = extract_feature_from_file(pic_feature_path)
            path_feature_dic[pic_feature_path] = pic_feature
    msgpack_numpy.dump(path_feature_dic, open(feature_pack_file, 'wb'))
コード例 #11
0
def create_train_valid_data(folder='/data/liubo/face/crop_face'):
    # 根据已经存在的数据训练人脸验证模型
    person_list = os.listdir(folder)
    path_feature_dic = {}  #
    for person in person_list:
        print person
        person_path = os.path.join(folder, person)
        pic_feature_list = os.listdir(person_path)
        for pic_feature_path in pic_feature_list:
            pic_feature_path = os.path.join(person_path, pic_feature_path)
            pic_feature = extract_feature_from_file(pic_feature_path)
            path_feature_dic[pic_feature_path] = pic_feature
    msgpack_numpy.dump(path_feature_dic, open(feature_pack_file, 'wb'))
コード例 #12
0
def extract_verif_feature():
    verif_path_feature_dic = {} # {person:[feature1,feature2,...,]}
    # path_feature_dic = msgpack_numpy.load(open(originalimages_verif_fc7_path_feature, 'rb'))
    path_set = set()
    for line in open(pair_file):
        tmp = line.rstrip().split()
        path_set.add(tmp[0])
        path_set.add(tmp[1])

    model, get_Conv_FeatureMap = load_model(layer_index=-5)
    for pic_path in path_set:
        fine_tune_feature = extract(pic_path, get_Conv_FeatureMap, (224, 224, 3))
        verif_path_feature_dic[pic_path] = fine_tune_feature
    msgpack_numpy.dump(verif_path_feature_dic, open(feature_pack_file, 'wb'))
コード例 #13
0
def extract_lfw_feature():
    lfw_feature_dic = {} # {person:[feature1,feature2,...,]}
    person_list = os.listdir(lfw_folder)
    for person_index, person in enumerate(person_list):
        print person_index, person
        person_path = os.path.join(lfw_folder, person)
        pic_list = os.listdir(person_path)
        this_person_feature_list = []
        for pic in pic_list:
            pic_path = os.path.join(person_path, pic)
            this_feature = extract(pic_path)
            this_person_feature_list.append((this_feature, os.path.join(person, pic)))
        lfw_feature_dic[person] = this_person_feature_list
    msgpack_numpy.dump(lfw_feature_dic, open(feature_pack_file, 'wb'))
コード例 #14
0
def extract_lfw_feature():
    lfw_feature_dic = {}  # {person:[feature1,feature2,...,]}
    person_list = os.listdir(lfw_folder)
    for person_index, person in enumerate(person_list):
        print person_index, person
        person_path = os.path.join(lfw_folder, person)
        pic_list = os.listdir(person_path)
        this_person_feature_list = []
        for pic in pic_list:
            pic_path = os.path.join(person_path, pic)
            this_feature = extract(pic_path)
            this_person_feature_list.append(
                (this_feature, os.path.join(person, pic)))
        lfw_feature_dic[person] = this_person_feature_list
    msgpack_numpy.dump(lfw_feature_dic, open(feature_pack_file, 'wb'))
コード例 #15
0
def main():
    folder = '/data/hanlin'
    person_path_dic = load_one_deep_path(folder)
    sample_list, person_num = person_path_dic_trans(person_path_dic)
    model, get_Conv_FeatureMap = load_model(output_layer_index=18)
    data = []
    label = []
    start = time()
    for pic_path, person_index in sample_list:
        feature_vector = extract(pic_path, get_Conv_FeatureMap, pic_shape)[0]
        data.append(feature_vector)
        label.append(person_index)
    end = time()
    print(end - start)
    msgpack_numpy.dump((data, label), open('hanlin.p', 'wb'))
コード例 #16
0
def main():
    folder = '/data/hanlin'
    person_path_dic = load_one_deep_path(folder)
    sample_list, person_num = person_path_dic_trans(person_path_dic)
    model, get_Conv_FeatureMap = load_model(output_layer_index=18)
    data = []
    label = []
    start = time()
    for pic_path, person_index in sample_list:
        feature_vector = extract(pic_path, get_Conv_FeatureMap, pic_shape)[0]
        data.append(feature_vector)
        label.append(person_index)
    end = time()
    print (end - start)
    msgpack_numpy.dump((data, label), open('hanlin.p', 'wb'))
コード例 #17
0
def extract_verif_feature():
    verif_path_feature_dic = {} # {person:[feature1,feature2,...,]}
    # path_feature_dic = msgpack_numpy.load(open(originalimages_verif_fc7_path_feature, 'rb'))
    path_set = set()
    for line in open(pair_file):
        tmp = line.rstrip().split()
        path_set.add(tmp[0])
        path_set.add(tmp[1])

    model, get_Conv_FeatureMap = load_model(layer_index=-5)
    print model.summary()
    for pic_path in path_set:
        fine_tune_feature = extract(pic_path, get_Conv_FeatureMap, (224, 224, 3))
        verif_path_feature_dic[pic_path] = fine_tune_feature
    msgpack_numpy.dump(verif_path_feature_dic, open(feature_pack_file, 'wb'))
コード例 #18
0
def train_valid_verif_model():
    all_data = []
    all_label = []
    all_pic_path_list = []
    count = 0
    for line in open(pair_file):
        if count % 100 == 0:
            print count
        count += 1
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            path1 = tmp[0]
            path2 = tmp[1]
            if (os.path.exists(path1)) and (os.path.exists(path2)):
                feature1 = extract_feature_from_file(path1)
                feature2 = extract_feature_from_file(path2)
                predicts = pw.cosine_similarity(feature1, feature2)
                all_data.append(predicts)
                all_label.append(int(tmp[2]))
    msgpack_numpy.dump((all_data, all_label, all_pic_path_list),
                       open(feature_pack_file, 'wb'))
    (all_data, all_label,
     all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    all_data = np.asarray(all_data)
    data = np.reshape(all_data,
                      newshape=(all_data.shape[0], all_data.shape[2]))
    label = np.asarray(all_label)
    print data.shape, label.shape

    kf = KFold(len(label), n_folds=10)
    all_acc = []
    for (train, valid) in kf:
        train_data = data[train]
        valid_data = data[valid]
        train_label = label[train]
        valid_label = label[valid]
        clf = LinearSVC()
        clf.fit(train_data, train_label)
        acc = accuracy_score(valid_label, clf.predict(valid_data))
        roc_auc = roc_auc_score(valid_label, clf.predict(valid_data))
        all_acc.append(acc)
        print acc, roc_auc
    print np.mean(all_acc)

    clf = LinearSVC()
    clf.fit(data, label)
    pdb.set_trace()
    cPickle.dump(clf, open(verification_model_file, 'wb'))
コード例 #19
0
def load_data(result_file, pack_file):
    person_feature_dic = {} # {person_name:[(pic_name, pic_feature),...,(pic_name, pic_feature)]}
    for line in open(result_file):
        tmp = line.rstrip().split('\t')
        if len(tmp) == 2:
            try:
                pic_path = tmp[0].split('/')
                person_name = pic_path[-2]
                pic_name = pic_path[-1]
                feature = msgpack_numpy.loads(base64.b64decode(tmp[1]))
                feature_list = person_feature_dic.get(person_name, [])
                feature_list.append((pic_name, feature))
                person_feature_dic[person_name] = feature_list
            except:
                print tmp
                continue
    msgpack_numpy.dump(person_feature_dic, open(pack_file, 'wb'))
コード例 #20
0
def create_lfw_pair(folder, pair_file):
    person_list = os.listdir(folder)
    tmp_list = []
    # 选择正样本
    for person in person_list:
        person_path = os.path.join(folder, person)
        pic_list = map(lambda x: os.path.join(os.path.join(person_path, x)),
                       os.listdir(person_path))
        if len(pic_list) > 2:
            # 每个人选择一个正样本
            np.random.shuffle(pic_list)
            tmp_list.append((pic_list[0], pic_list[1], True))
    # 选择相同数量的负样本
    np.random.shuffle(person_list)
    person_num = len(person_list)
    positive_num = len(tmp_list)
    count = 0
    for person_index, person in enumerate(person_list):
        this_person_path = os.path.join(folder, person)
        pic_list = map(
            lambda x: os.path.join(os.path.join(this_person_path, x)),
            os.listdir(this_person_path))
        other_person = person_list[(person_index + 1) % person_num]
        other_person_path = os.path.join(folder, other_person)
        other_pic_list = map(
            lambda x: os.path.join(os.path.join(other_person_path, x)),
            os.listdir(other_person_path))
        np.random.shuffle(pic_list)
        np.random.shuffle(other_pic_list)
        tmp_list.append((pic_list[0], other_pic_list[0], False))
        count += 1
        if count == positive_num:
            break
    pair_list = []
    label_list = []
    np.random.shuffle(tmp_list)
    num = len(tmp_list)
    tmp_list = tmp_list[:num / 100 * 100]
    for element in tmp_list:
        pic_path1, pic_path2, label = element
        pair_list.append(pic_path1)
        pair_list.append(pic_path2)
        label_list.append(label)
    print len(tmp_list), len(pair_list), len(label_list)
    msgpack_numpy.dump((pair_list, label_list), open(pair_file, 'wb'))
コード例 #21
0
def train_valid_verif_model():
    all_data = []
    all_label = []
    all_pic_path_list = []
    count = 0
    for line in open(pair_file):
        if count % 100 == 0:
            print count
        count += 1
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            path1 = tmp[0]
            path2 = tmp[1]
            if (os.path.exists(path1)) and (os.path.exists(path2)):
                feature1 = extract_feature_from_file(path1)
                feature2 = extract_feature_from_file(path2)
                predicts = pw.cosine_similarity(feature1, feature2)
                all_data.append(predicts)
                all_label.append(int(tmp[2]))
    msgpack_numpy.dump((all_data, all_label, all_pic_path_list), open(feature_pack_file, 'wb'))
    (all_data, all_label, all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    all_data = np.asarray(all_data)
    data = np.reshape(all_data, newshape=(all_data.shape[0], all_data.shape[2]))
    label = np.asarray(all_label)
    print data.shape, label.shape

    kf = KFold(len(label), n_folds=10)
    all_acc = []
    for (train, valid) in kf:
        train_data = data[train]
        valid_data = data[valid]
        train_label = label[train]
        valid_label = label[valid]
        clf = LinearSVC()
        clf.fit(train_data, train_label)
        acc = accuracy_score(valid_label, clf.predict(valid_data))
        roc_auc = roc_auc_score(valid_label, clf.predict(valid_data))
        all_acc.append(acc)
        print acc, roc_auc
    print np.mean(all_acc)

    clf = LinearSVC()
    clf.fit(data, label)
    pdb.set_trace()
    cPickle.dump(clf, open(verification_model_file, 'wb'))
コード例 #22
0
 def cb(output):
     predicting_result = output.result()
     if predicting_result is not None:
         self.s2c_socket.send_multipart(
             [nameClient(index),
              dump(predicting_result['action'])])
     training_data = self.parse_memory(index, obs, predicting_result)
     if training_data is not None:
         self.training_queue.put(training_data)
コード例 #23
0
def create_sample_list(root_folder, train_valid_sample_list_file, verif_sample_list_file):
    person_list = os.listdir(root_folder)
    train_valid_sample_list = []
    verif_sample_list = []
    # 留下2000人用于人脸验证模型
    for person_index, person_name in enumerate(person_list[:-2000]):
        person_path = os.path.join(root_folder, person_name)
        pic_list = map(lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path)))
        train_valid_sample_list.extend(pic_list)
    np.random.shuffle(train_valid_sample_list)
    train_num = int(len(train_valid_sample_list) * 0.8)
    train_sample_list = train_valid_sample_list[:train_num]
    valid_sample_list = train_valid_sample_list[train_num:]
    msgpack_numpy.dump((train_sample_list, valid_sample_list), open(train_valid_sample_list_file, 'wb'))
    for person_index, person_name in enumerate(person_list[-2000:]):
        person_path = os.path.join(root_folder, person_name)
        pic_list = map(lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path)))
        verif_sample_list.extend(pic_list)
    msgpack_numpy.dump(verif_sample_list, open(verif_sample_list_file, 'wb'))
コード例 #24
0
def main_max_min():
    lfw_folder = '/data/liubo/face/lfw_face'
    pair_file = '/data/liubo/face/lfw_pair.txt'
    same_dist_list = []
    no_same_dist_list = []
    count = 0
    for line in open(pair_file):
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            person = tmp[0]
            person_path = os.path.join(lfw_folder, person)
            pic_list = os.listdir(person_path)
            if len(pic_list) == 1:
                print 'error person :', person
                continue
            else:
                np.random.shuffle(pic_list)
                this_dist = []
                for index_i in range(len(pic_list[0:10])):
                    for index_j in range(index_i + 1, len(pic_list[0:10])):
                        dist = cal_two_pic_distance(
                            os.path.join(person_path, pic_list[index_i]),
                            os.path.join(person_path, pic_list[index_j]))
                        this_dist.append(dist)
                same_dist_list.append(np.min(this_dist))

        elif len(tmp) == 4:
            person1 = tmp[0]
            person1_path = os.path.join(lfw_folder, person1)
            pic1_list = os.listdir(person1_path)
            person2 = tmp[2]
            person2_path = os.path.join(lfw_folder, person2)
            pic2_list = os.listdir(person2_path)
            if len(pic1_list) > 0 and len(pic2_list) > 0:
                np.random.shuffle(pic1_list)
                np.random.shuffle(pic2_list)
                pic_path1 = os.path.join(person1_path, pic1_list[0])
                pic_path2 = os.path.join(person2_path, pic2_list[0])
                dist = cal_two_pic_distance(pic_path1, pic_path2)
                no_same_dist_list.append(dist)

    msgpack_numpy.dump((same_dist_list, no_same_dist_list),
                       open('dist_max_min.p', 'wb'))
コード例 #25
0
def extract_lfw_feature():
    lfw_feature_dic = {} # {person:[feature1,feature2,...,]}
    person_list = os.listdir(lfw_folder)
    for person_index, person in enumerate(person_list):
        print person_index, person
        person_path = os.path.join(lfw_folder, person)
        pic_list = os.listdir(person_path)
        this_person_feature_dic = {}
        for pic in pic_list:
            try:
                pic_path = os.path.join(person_path, pic)
                index = int(pic.split('.')[0].split('_')[-1])
                this_feature = extract(pic_path)
                this_person_feature_dic[index] = (this_feature, pic_path)
            except:
                traceback.print_exc()
                pdb.set_trace()
        lfw_feature_dic[person] = this_person_feature_dic
    msgpack_numpy.dump(lfw_feature_dic, open(feature_pack_file, 'wb'))
コード例 #26
0
def create_lfw_pair(folder, pair_file):
    person_list = os.listdir(folder)
    tmp_list = []
    # 选择正样本
    for person in person_list:
        person_path = os.path.join(folder, person)
        pic_list = map(lambda x:os.path.join(os.path.join(person_path, x)), os.listdir(person_path))
        if len(pic_list) > 2:
            # 每个人选择一个正样本
            np.random.shuffle(pic_list)
            tmp_list.append((pic_list[0], pic_list[1], True))
    # 选择相同数量的负样本
    np.random.shuffle(person_list)
    person_num = len(person_list)
    positive_num = len(tmp_list)
    count = 0
    for person_index, person in enumerate(person_list):
        this_person_path = os.path.join(folder, person)
        pic_list = map(lambda x:os.path.join(os.path.join(this_person_path, x)), os.listdir(this_person_path))
        other_person = person_list[(person_index+1)%person_num]
        other_person_path = os.path.join(folder, other_person)
        other_pic_list = map(lambda x:os.path.join(os.path.join(other_person_path, x)), os.listdir(other_person_path))
        np.random.shuffle(pic_list)
        np.random.shuffle(other_pic_list)
        tmp_list.append((pic_list[0], other_pic_list[0], False))
        count += 1
        if count == positive_num:
            break
    pair_list = []
    label_list = []
    np.random.shuffle(tmp_list)
    num = len(tmp_list)
    tmp_list = tmp_list[:num/100*100]
    for element in tmp_list:
        pic_path1, pic_path2, label = element
        pair_list.append(pic_path1)
        pair_list.append(pic_path2)
        label_list.append(label)
    print len(tmp_list), len(pair_list), len(label_list)
    msgpack_numpy.dump((pair_list, label_list), open(pair_file, 'wb'))
コード例 #27
0
def feature_trans_autoencoder(src_pack_file, dst_pack_file):
    weight_file = '/data/liubo/face/annotate_face_model/skyeye_face_autoencoder.weight'
    model_file = '/data/liubo/face/annotate_face_model/skyeye_face_autoencoder.model'
    autoencoder =  model_from_json(open(model_file, 'r').read())
    autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
    autoencoder.load_weights(weight_file)
    get_Conv_FeatureMap = K.function([autoencoder.layers[0].get_input_at(False), K.learning_phase()],
                                     [autoencoder.layers[-2].get_output_at(False)])

    person_feature_dic = msgpack_numpy.load(open(src_pack_file, 'rb'))
    for person_index, person in enumerate(person_feature_dic):
        feature_list = person_feature_dic.get(person)
        for index in range(len(feature_list)):
            try:
                if feature_list[index][1] == None:
                    continue
                this_feature = np.array(feature_list[index][1][0])
                this_feature = np.reshape(this_feature, (1, this_feature.size))
                this_feature = get_Conv_FeatureMap([this_feature, 0])[0][0]
                feature_list[index][1][0] = this_feature
            except:
                traceback.print_exc()
    msgpack_numpy.dump(person_feature_dic, open(dst_pack_file, 'wb'))
コード例 #28
0
def get_all_img_feature():
    folder = '/tmp/annotate'
    result_pic_folder = '/data/liubo/face/research_self'
    result_feature_folder = '/data/liubo/face/research_feature_self'
    person_list = os.listdir(folder)
    for person in person_list:
        person_path = os.path.join(folder, person)
        result_person_pic_folder = os.path.join(result_pic_folder, person)
        if not os.path.exists(result_person_pic_folder):
            os.makedirs(result_person_pic_folder)
        person_feature_folder = os.path.join(result_feature_folder, person)
        if not os.path.exists(person_feature_folder):
            os.makedirs(person_feature_folder)
        pic_list = os.listdir(person_path)
        for pic in pic_list:
            try:
                pic_path = os.path.join(person_path, pic)
                feature = np.asarray(extract_feature_from_file(pic_path)[0])
                shutil.copy(pic_path, os.path.join(result_person_pic_folder, pic))
                msgpack_numpy.dump(feature, open(os.path.join(person_feature_folder, pic + '.p'), 'wb'))
            except:
                traceback.print_exc()
                continue
コード例 #29
0
 def save_pic_feature(self, pic_path, person_name):
     #  将已经存在的文件生成特征并保存到指定文件夹下, 用于管理员加入新的图片(加入新的图片后, 提取特征, 保存到指定文件夹)
     person_pic_folder_path = os.path.join(self.all_pic_data_folder, person_name)
     person_feature_folder_path = os.path.join(self.all_pic_feature_data_folder, person_name)
     if not os.path.exists(person_pic_folder_path):
         os.makedirs(person_pic_folder_path)
     if not os.path.exists(person_feature_folder_path):
         os.makedirs(person_feature_folder_path)
     pic_name = os.path.split(pic_path)[-1]
     # 特征文件
     person_feature_path = os.path.join(person_feature_folder_path, pic_name)
     # 人脸文件
     person_pic_path = os.path.join(person_pic_folder_path, pic_name)
     result = extract_feature_from_binary_data(open(pic_path, 'rb'))
     if result == None:
         return
     face_num, all_frames, all_feature = result
     biggest_face_index = find_big_face(all_frames)
     pic_frame = all_frames[biggest_face_index]
     pic_feature = all_feature[biggest_face_index]
     x, y, width, height = pic_frame
     face_pic = cv2.imread(pic_path)[y:y+width, x:x+height, :]
     cv2.imwrite(person_pic_path, face_pic)
     msgpack_numpy.dump(pic_feature, open(person_feature_path, 'wb'))
コード例 #30
0
def extract_triplet_feature():
    lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    new_lfw_feature_dic = {}
    model_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.model'
    weight_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.weight'
    model = model_from_json(open(model_file, 'r').read())
    opt = Adam()
    model.compile(optimizer=opt, loss=['categorical_crossentropy'])
    model.load_weights(weight_file)

    # pdb.set_trace()
    get_Conv_FeatureMap = K.function([model.layers[2].layers[0].get_input_at(False), K.learning_phase()],
                                     [model.layers[2].layers[-1].get_output_at(False)])
    for person in lfw_feature_dic:
        # print person
        this_person_feature_list = lfw_feature_dic.get(person)
        this_person_triplet_feature_list = []
        for feature, path in this_person_feature_list:
            feature = np.reshape(feature, (1, feature.size))
            new_feature = get_Conv_FeatureMap([feature, 0])[0].copy()
            this_person_triplet_feature_list.append((new_feature, path))

        new_lfw_feature_dic[person] = this_person_triplet_feature_list
    msgpack_numpy.dump(new_lfw_feature_dic, open(triplet_feature_pack_file, 'wb'))
コード例 #31
0
    def run(self):
        self.player = Environment(self.index * 113)
        context = zmq.Context()
        self.c2s_socket = context.socket(zmq.PUSH)
        self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        self.c2s_socket.connect(self.c2s)

        self.s2c_socket = context.socket(zmq.DEALER)
        self.s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        self.s2c_socket.connect(self.s2c)
        while True:
            obs = self.player.current_state()
            self.c2s_socket.send(dump((self.index, obs)), copy=False)
            if obs is not None:
                action = load(self.s2c_socket.recv(copy=False).bytes)
                self.player.action(action)
コード例 #32
0
    def run(self):

        print "My pid is :%d\n" % os.getpid()

        self.player = Environment(self.index * 113)
        context = zmq.Context()
        self.c2s_socket = context.socket(zmq.PUSH)
        self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        self.c2s_socket.connect(self.c2s)

        self.s2c_socket = context.socket(zmq.DEALER)
        self.s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        self.s2c_socket.connect(self.s2c)
        rew, isover, frag, kdr = None, False, 0, 0

        while True:
            frame = self.player.current_state()
            self.c2s_socket.send(dump(
                (self.identity, [frame], rew, isover, frag, kdr)),
                                 copy=False)  #rew is last action's reward
            action = load(self.s2c_socket.recv(copy=False).bytes)
            rew, isover, frag, kdr = self.player.action(action)
            if isover:
                self.player.reset_stat()
コード例 #33
0
# logger_error = logging.getLogger('errorhandler')


root_folder = '/data/liubo/face/all_pic_data/FaceScrub'
person_list = os.listdir(root_folder)
all_data = []
all_label = []
index = 0
for person in person_list[:20]:
    print person, index
    pic_list = os.listdir(os.path.join(root_folder, person))
    for pic in pic_list:
        try:
            pic_path = os.path.join(root_folder, person, pic)
            im = cv2.resize(cv2.imread(pic_path), (150, 150))
            all_data.append(im)
            all_label.append(index)
        except:
            traceback.print_exc()
            continue
    index += 1

all_data = np.asarray(all_data)
all_label = np.asarray(all_label)


train_data, test_data, train_label, test_label = train_test_split(all_data, all_label)
print train_data.shape, train_label.shape, test_data.shape, test_label.shape
msgpack_numpy.dump((train_data, test_data, train_label, test_label),
                   open('/data/liubo/face/all_pic_data/FaceScrub.p', 'wb'))
コード例 #34
0
def find_max_min():
    # 同一个人里找相似度最小的, 不同人里找相似度最大的
    lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    person_list = lfw_feature_dic.keys()
    same_person_score = []
    same_person_score_pair_dic = {
    }  # {score:[(path1,path2), ...,(path1,path2)]}
    no_same_person_score = []
    no_same_person_score_pair_dic = {
    }  # {score:[(path1,path2), ...,(path1,path2)]}
    heapq.heapify(same_person_score)
    pair_threshold = 3000

    for person_index, person in enumerate(person_list):
        start = time()
        path_feature_list = lfw_feature_dic.get(person)
        # 找出该人里所有可能的pair --- score越小越好(同一个人最不相似的照片)
        # 每次将最大的score去掉,加入更小的score,所以在加入是score取负,这样堆顶就是原来score最大的值
        length = len(path_feature_list)
        for index_i in range(length):
            for index_j in range(index_i, length):
                feature1, path1 = path_feature_list[index_i]
                feature2, path2 = path_feature_list[index_j]
                feature1 = np.reshape(feature1,
                                      newshape=(1, feature1.shape[0]))
                feature2 = np.reshape(feature2,
                                      newshape=(1, feature2.shape[0]))
                this_score = 0 - pw.cosine_similarity(feature1, feature2)[0][0]
                if len(same_person_score) > pair_threshold:
                    top_item = same_person_score[0]
                    if this_score < top_item:  # 更加不相似,加入
                        heapq.heappop(same_person_score)
                        heapq.heappush(same_person_score, this_score)
                        # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair)
                        if top_item in same_person_score_pair_dic:
                            same_person_score_pair_dic.pop(top_item)
                        pair_list = same_person_score_pair_dic.get(
                            this_score, [])
                        pair_list.append((path1, path2))
                        same_person_score_pair_dic[this_score] = pair_list
                else:
                    heapq.heappush(same_person_score, this_score)
                    pair_list = same_person_score_pair_dic.get(this_score, [])
                    pair_list.append((path1, path2))
                    same_person_score_pair_dic[this_score] = pair_list

        # 找出所有可能的不相似的pair

        for other_person_index, other_person in enumerate(
                person_list[person_index + 1:], start=person_index + 1):
            other_path_feature_list = lfw_feature_dic.get(other_person)
            if other_person == person:
                continue
            other_length = len(other_path_feature_list)
            for index_i in range(length):
                for index_j in range(other_length):
                    feature1, path1 = path_feature_list[index_i]
                    feature2, path2 = other_path_feature_list[index_j]
                    feature1 = np.reshape(feature1,
                                          newshape=(1, feature1.shape[0]))
                    feature2 = np.reshape(feature2,
                                          newshape=(1, feature2.shape[0]))
                    this_score = pw.cosine_similarity(feature1, feature2)[0][0]
                    if len(no_same_person_score) > pair_threshold:
                        top_item = no_same_person_score[0]
                        if this_score < top_item:  # 更加相似, 加入
                            heapq.heappop(no_same_person_score)
                            heapq.heappush(no_same_person_score, this_score)
                            # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair)
                            if top_item in no_same_person_score_pair_dic:
                                no_same_person_score_pair_dic.pop(top_item)
                            pair_list = no_same_person_score_pair_dic.get(
                                this_score, [])
                            pair_list.append((path1, path2))
                            no_same_person_score_pair_dic[
                                this_score] = pair_list
                    else:
                        heapq.heappush(no_same_person_score, this_score)
                        pair_list = no_same_person_score_pair_dic.get(
                            this_score, [])
                        pair_list.append((path1, path2))
                        no_same_person_score_pair_dic[this_score] = pair_list
        end = time()
        print person_index, person, (end - start), length
    msgpack_numpy.dump((same_person_score_pair_dic, same_person_score,
                        no_same_person_score_pair_dic, no_same_person_score),
                       open(new_pair_pack_file, 'wb'))
コード例 #35
0
def train_valid_verif_model():
    all_data = []
    all_label = []
    all_pic_path_list = []
    count = 0
    path_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    not_in = 0
    not_in_pair = {}
    for line in open(pair_file):
        if count % 100 == 0:
            print count
        count += 1
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            path1 = tmp[0]
            path2 = tmp[1]
            label = int(tmp[2])
            if path1 in path_feature_dic and path2 in path_feature_dic:
                try:
                    feature1 = np.asarray(path_feature_dic.get(path1))
                    feature2 = np.asarray(path_feature_dic.get(path2))
                    predicts = pw.cosine_similarity(feature1, feature2)
                    all_data.append(predicts)
                    all_label.append(label)
                    all_pic_path_list.append((path1, path2))
                except:
                    traceback.print_exc()
            else:
                traceback.print_exc()
    msgpack_numpy.dump((all_data, all_label, all_pic_path_list), open(feature_pack_file, 'wb'))

    (all_data, all_label, all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    pdb.set_trace()
    all_data = np.asarray(all_data)
    all_data = np.reshape(all_data, newshape=(all_data.shape[0], all_data.shape[2]))
    all_label = np.asarray(all_label)
    all_pic_path_list = np.asarray(all_pic_path_list)
    print all_data.shape, all_label.shape

    all_acc = []

    kf = KFold(n_folds=10)
    all_acc = []
    f = open('research_verif_result.txt', 'w')
    for k, (train, valid) in enumerate(kf.split(all_data, all_label, all_pic_path_list)):
        train_data = all_data[train]
        valid_data = all_data[valid]
        train_label = all_label[train]
        valid_label = all_label[valid]
        train_path_list = all_pic_path_list[train]
        valid_path_list = all_pic_path_list[valid]

        clf = LinearSVC()
        clf.fit(train_data, train_label)
        acc = accuracy_score(valid_label, clf.predict(valid_data))
        for k in range(len(valid_path_list)):
            f.write(os.path.split(valid_path_list[k][0])[1] + '\t' + os.path.split(valid_path_list[k][1])[1] +
                    '\t' + str(valid_data[k][0])+ '\t' + str(valid_label[k]) + '\n')
        all_acc.append(acc)
        print acc
    print 'mean_acc :', np.mean(all_acc)
    f.close()
    clf = LinearSVC()
    clf.fit(all_data, all_label)
    pdb.set_trace()
    cPickle.dump(clf, open(verification_model_file, 'wb'))
コード例 #36
0
def main_feature():
    lfw_folder = '/data/liubo/face/lfw_face'
    pair_file = '/data/liubo/face/lfw_pair.txt'
    data = []
    label = []
    for line in open(pair_file):
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            person = tmp[0]
            person_path = os.path.join(lfw_folder, person)
            pic_list = os.listdir(person_path)
            if len(pic_list) == 1:
                print 'error person :', person
                continue
            else:
                np.random.shuffle(pic_list)
                pic_path1 = os.path.join(person_path, pic_list[0])
                pic_path2 = os.path.join(person_path, pic_list[1])
                im1 = np.transpose(
                    np.reshape(imresize(imread(pic_path1), size=(78, 62, 3)),
                               (1, 78, 62, 3)), (0, 3, 1, 2))
                im2 = np.transpose(
                    np.reshape(imresize(imread(pic_path2), size=(78, 62, 3)),
                               (1, 78, 62, 3)), (0, 3, 1, 2))
                # im1 = np.transpose(np.reshape(imresize(imread(pic_path1), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2))
                # im2 = np.transpose(np.reshape(imresize(imread(pic_path2), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2))
                im1_feature = get_Conv_FeatureMap([im1, 0])[0]
                im2_feature = get_Conv_FeatureMap([im2, 0])[0]
                this_data = []
                this_data.extend(list(im1_feature[0]))
                this_data.extend(list(im2_feature[0]))
                data.append(this_data)
                label.append(0)
        elif len(tmp) == 4:
            person1 = tmp[0]
            person1_path = os.path.join(lfw_folder, person1)
            pic1_list = os.listdir(person1_path)
            person2 = tmp[2]
            person2_path = os.path.join(lfw_folder, person2)
            pic2_list = os.listdir(person2_path)
            if len(pic1_list) > 0 and len(pic2_list) > 0:
                np.random.shuffle(pic1_list)
                np.random.shuffle(pic2_list)
                pic_path1 = os.path.join(person1_path, pic1_list[0])
                pic_path2 = os.path.join(person2_path, pic2_list[0])
                im1 = np.transpose(
                    np.reshape(imresize(imread(pic_path1), size=(78, 62, 3)),
                               (1, 78, 62, 3)), (0, 3, 1, 2))
                im2 = np.transpose(
                    np.reshape(imresize(imread(pic_path2), size=(78, 62, 3)),
                               (1, 78, 62, 3)), (0, 3, 1, 2))
                # im1 = np.transpose(np.reshape(imresize(imread(pic_path1), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2))
                # im2 = np.transpose(np.reshape(imresize(imread(pic_path2), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2))
                im1_feature = get_Conv_FeatureMap([im1, 0])[0]
                im2_feature = get_Conv_FeatureMap([im2, 0])[0]
                this_data = []
                this_data.extend(list(im1_feature[0]))
                this_data.extend(list(im2_feature[0]))
                data.append(this_data)
                label.append(1)
    msgpack_numpy.dump((data, label), open('lfw_data_label.p', 'w'))
コード例 #37
0
def train_valid_verif_model():
    all_data = []
    all_label = []
    all_pic_path_list = []
    count = 0
    path_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    not_in = 0
    not_in_pair = {}
    for line in open(pair_file):
        if count % 100 == 0:
            print count
        count += 1
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            path1 = tmp[0]
            path2 = tmp[1]
            label = int(tmp[2])
            if path1 in path_feature_dic and path2 in path_feature_dic:
                try:
                    feature1 = np.asarray(path_feature_dic.get(path1))
                    feature2 = np.asarray(path_feature_dic.get(path2))
                    predicts = pw.cosine_similarity(feature1, feature2)
                    all_data.append(predicts)
                    all_label.append(label)
                    all_pic_path_list.append((path1, path2))
                except:
                    traceback.print_exc()
            else:
                traceback.print_exc()
    msgpack_numpy.dump((all_data, all_label, all_pic_path_list),
                       open(feature_pack_file, 'wb'))

    (all_data, all_label,
     all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    pdb.set_trace()
    all_data = np.asarray(all_data)
    all_data = np.reshape(all_data,
                          newshape=(all_data.shape[0], all_data.shape[2]))
    all_label = np.asarray(all_label)
    all_pic_path_list = np.asarray(all_pic_path_list)
    print all_data.shape, all_label.shape

    all_acc = []

    kf = KFold(n_folds=10)
    all_acc = []
    f = open('research_verif_result.txt', 'w')
    for k, (train, valid) in enumerate(
            kf.split(all_data, all_label, all_pic_path_list)):
        train_data = all_data[train]
        valid_data = all_data[valid]
        train_label = all_label[train]
        valid_label = all_label[valid]
        train_path_list = all_pic_path_list[train]
        valid_path_list = all_pic_path_list[valid]

        clf = LinearSVC()
        clf.fit(train_data, train_label)
        acc = accuracy_score(valid_label, clf.predict(valid_data))
        for k in range(len(valid_path_list)):
            f.write(
                os.path.split(valid_path_list[k][0])[1] + '\t' +
                os.path.split(valid_path_list[k][1])[1] + '\t' +
                str(valid_data[k][0]) + '\t' + str(valid_label[k]) + '\n')
        all_acc.append(acc)
        print acc
    print 'mean_acc :', np.mean(all_acc)
    f.close()
    clf = LinearSVC()
    clf.fit(all_data, all_label)
    pdb.set_trace()
    cPickle.dump(clf, open(verification_model_file, 'wb'))
コード例 #38
0
    def recognize_online_cluster(self, image, image_id):
        '''
            :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值);
                            和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人
            :return:
        '''
        start = time.time()
        need_add = False
        need_save = False
        current_day = get_current_day()
        log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a')
        log_file.write(
            '\t'.join(map(str, ["receive image", image_id,
                                time.time()])) + '\n')
        feature_str = ''
        try:
            image = base64.decodestring(image)
            image = zlib.decompress(image)
            im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1)
            log_file.write(
                '\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) +
                '\n')
            # 图片尺寸过滤
            if im.shape[0] < size_threshold or im.shape[1] < size_threshold:
                log_file.write('\t'.join(
                    map(str, [
                        'stat recognize_time :', (time.time() -
                                                  start), 'small_size'
                    ])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            # 清晰度过滤
            blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96)))
            if blur_sign:
                log_file.write('\t'.join(
                    map(str, [
                        'stat recognize_time :',
                        (time.time() - start), 'blur_filter', blur_var
                    ])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            #  保存传过来的图片
            # img_file = '/tmp/research_face/%s.jpg' %image_id
            time_slot = get_time_slot(image_id)
            if time_slot == None:
                time_slot = 'error'
            time_slot_dir = os.path.join(tmp_face_dir, time_slot)
            if not os.path.exists(time_slot_dir):
                os.makedirs(time_slot_dir)
            img_file = os.path.join(time_slot_dir, image_id + '.jpg')
            cv2.imwrite(img_file, im)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, 1.0, feature_str, need_save
        try:
            # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest
            result = self.extract_pic_feature(img_file)
            if result == None:
                log_file.write('\t'.join(
                    map(str, [
                        'stat not_find_face', 'time :', (time.time() - start)
                    ])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            face_pic, im_feature = result

            try:
                # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素)
                # 在识别出人名后将人名和feature放入到self.nearest
                nearest_sim_list = self.cal_nearest_sim(
                    current_feature=im_feature)
            except:
                traceback.print_exc()
                nearest_sim_list = []
            log_file.write('\t'.join(
                map(str, ['nearest_sim_list :',
                          map(str, nearest_sim_list)])) + '\n')
            feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature))
            log_file.write('\t'.join(
                map(str, ['extract_feature_time :', (time.time() - start)])) +
                           '\n')
            # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离

            tmp_list = self.find_k_neighbors_with_lsh(im_feature)
            nearest_sim_list.sort(key=lambda x: x[0], reverse=True)
            nearest_sim_list.extend(tmp_list)
            dist_label_list = nearest_sim_list[:]

            # 计算
            log_file.write('\t'.join(
                map(str, ['dist_label_list :',
                          map(str, dist_label_list)])) + '\n')
            if dist_label_list == None:
                this_id = self.must_be_not_same_id
                this_label = self.new_person_str + str(
                    self.current_new_person_id)
            else:
                # 计算prob --- 根据距离计算prob
                this_id, this_label = self.evaluate_result(dist_label_list)
            # 不管概率, 都要将最新的一张图片加入到self.nearest
            self.nearest.append((this_label, im_feature))
            log_file.write(
                '\t'.join(map(str, ['self.nearest :',
                                    map(str, self.nearest)])) + '\n')
            # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中
            if this_id == self.same_pic_id:
                need_add = False
            elif this_id == self.must_be_same_id:
                need_add = False
                need_save = True
                this_person_pic_folder = os.path.join(
                    self.all_pic_data_folder, this_label + self.must_same_str)
                this_person_feature_folder = os.path.join(
                    self.all_pic_feature_data_folder,
                    this_label + self.must_same_str)
            elif this_id == self.must_be_not_same_id:
                this_label = self.new_person_str + str(
                    self.current_new_person_id)
                self.current_new_person_id += 1
                this_person_pic_folder = os.path.join(self.all_pic_data_folder,
                                                      this_label)
                this_person_feature_folder = os.path.join(
                    self.all_pic_feature_data_folder, this_label)
                need_add = True
                need_save = True
            elif this_id == self.maybe_same_id:
                this_person_pic_folder = os.path.join(
                    self.all_pic_data_folder, this_label + self.maybe_same_str)
                this_person_feature_folder = os.path.join(
                    self.all_pic_feature_data_folder,
                    this_label + self.maybe_same_str)
                need_add = False  # prob在灰度区域的不如入,其余情况加入
                need_save = True
            else:
                log_file.write('\t'.join(map(str, ['error para :', this_id])) +
                               '\n')
            if need_save:
                try:
                    if not os.path.exists(this_person_pic_folder):
                        os.makedirs(this_person_pic_folder)
                    if not os.path.exists(this_person_feature_folder):
                        os.makedirs(this_person_feature_folder)
                    # 直接存储图片对应的特征, 同时保存图片文件
                    this_pic_feature_name = os.path.join(
                        this_person_feature_folder, image_id + '.p')
                    msgpack_numpy.dump(im_feature,
                                       open(this_pic_feature_name, 'wb'))
                    this_pic_face_name = os.path.join(this_person_pic_folder,
                                                      image_id + '.jpg')
                    cv2.imwrite(this_pic_face_name, face_pic)
                except:
                    traceback.print_exc()
                    return self.unknown, 1.0, feature_str, False
            # 加入LSH Forest --- partial_fit
            if need_add:
                self.add_one_pic(im_feature, this_label)
                # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储]
            if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id:
                end = time.time()
                log_file.write('\t'.join(
                    map(str, [
                        'stat recognize_time :', (end - start), 'this_id :',
                        self.trans_dic.get(this_id)
                    ])) + '\n')
                log_file.close()
                need_save = True
                return this_label.replace(self.must_same_str, ''), str(
                    dist_label_list[0][0]), str(feature_str), str(need_save)
            else:
                # 灰度区域,不显示人名
                end = time.time()
                log_file.write(
                    '\t'.join(map(str, ['stat gray_area :', (end - start)])) +
                    '\n')
                log_file.close()
                return self.unknown, str(
                    dist_label_list[0][0]), str(feature_str), str(False)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, str(100.0), str(feature_str), str(False)
コード例 #39
0
def save_data(extract_func, data_path):
    feature, label = extract_func()
    msgpack_numpy.dump((feature, label), open(data_path, 'wb'))
コード例 #40
0
def main(args):

    with tf.Graph().as_default():

        with tf.Session() as sess:

            # Read the file containing the pairs used for testing
            pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))

            # Get the paths for the corresponding images
            paths, actual_issame = lfw.get_paths(
                os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

            # Load the model
            print('Model directory: %s' % args.model_dir)
            meta_file, ckpt_file = facenet.get_model_filenames(
                os.path.expanduser(args.model_dir))
            print('Metagraph file: %s' % meta_file)
            print('Checkpoint file: %s' % ckpt_file)
            facenet.load_model(args.model_dir, meta_file, ckpt_file)

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")

            image_size = images_placeholder.get_shape()[1]
            embedding_size = embeddings.get_shape()[1]

            image_size = image_size.value
            # pdb.set_trace()

            # Run forward pass to calculate embeddings
            print('Runnning forward pass on LFW images')
            batch_size = args.lfw_batch_size
            nrof_images = len(paths)
            nrof_batches = int(math.ceil(1.0 * nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))

            print('nrof_batches :{}'.format(nrof_batches))

            all_time = 0

            for i in range(nrof_batches):
                start_index = i * batch_size
                end_index = min((i + 1) * batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                # pdb.set_trace()
                images = facenet.load_data(paths_batch, False, False,
                                           image_size)
                feed_dict = {images_placeholder: images}
                start = time()
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)
                end = time()
                all_time += (end - start)
                print('index: {}  time: {}'.format(i, (end - start)))
                # pdb.set_trace()
            print('all_time :', all_time)

            msgpack_numpy.dump((paths, emb_array, actual_issame),
                               open('lfw_feature.p', 'wb'))

            tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(
                emb_array,
                args.seed,
                actual_issame,
                nrof_folds=args.lfw_nrof_folds)

            print('Accuracy: %1.3f+-%1.3f' %
                  (np.mean(accuracy), np.std(accuracy)))
コード例 #41
0
    def recognize_online_cluster(self, image, image_id):
        '''
            :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值);
                            和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人
            :return:
        '''
        start = time.time()
        need_add = False
        need_save = False
        current_day = get_current_day()
        log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a')
        log_file.write('\t'.join(map(str, ["receive image", image_id, time.time()])) + '\n')
        feature_str = ''
        try:
            image = base64.decodestring(image)
            image = zlib.decompress(image)
            im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1)
            log_file.write('\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) + '\n')
            # 图片尺寸过滤
            if im.shape[0] < size_threshold or im.shape[1] < size_threshold:
                log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'small_size'])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            # 清晰度过滤
            blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96)))
            if blur_sign:
                log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'blur_filter', blur_var])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            #  保存传过来的图片
            # img_file = '/tmp/research_face/%s.jpg' %image_id
            time_slot = get_time_slot(image_id)
            if time_slot == None:
                time_slot = 'error'
            time_slot_dir = os.path.join(tmp_face_dir, time_slot)
            if not os.path.exists(time_slot_dir):
                os.makedirs(time_slot_dir)
            img_file = os.path.join(time_slot_dir, image_id+'.jpg')
            cv2.imwrite(img_file, im)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, 1.0, feature_str, need_save
        try:
            # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest
            result = self.extract_pic_feature(img_file)
            if result == None:
                log_file.write('\t'.join(map(str, ['stat not_find_face', 'time :', (time.time() - start)]))+'\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            face_pic, im_feature = result

            try:
                # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素)
                # 在识别出人名后将人名和feature放入到self.nearest
                nearest_sim_list = self.cal_nearest_sim(current_feature=im_feature)
            except:
                traceback.print_exc()
                nearest_sim_list = []
            log_file.write('\t'.join(map(str, ['nearest_sim_list :', map(str, nearest_sim_list)])) + '\n')
            feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature))
            log_file.write('\t'.join(map(str, ['extract_feature_time :', (time.time() - start)]))+'\n')
            # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离

            tmp_list = self.find_k_neighbors_with_lsh(im_feature)
            nearest_sim_list.sort(key=lambda x: x[0], reverse=True)
            nearest_sim_list.extend(tmp_list)
            dist_label_list = nearest_sim_list[:]

            # 计算
            log_file.write('\t'.join(map(str, ['dist_label_list :', map(str, dist_label_list)])) + '\n')
            if dist_label_list == None:
                this_id = self.must_be_not_same_id
                this_label = self.new_person_str + str(self.current_new_person_id)
            else:
                # 计算prob --- 根据距离计算prob
                this_id, this_label = self.evaluate_result(dist_label_list)
            # 不管概率, 都要将最新的一张图片加入到self.nearest
            self.nearest.append((this_label, im_feature))
            log_file.write('\t'.join(map(str, ['self.nearest :', map(str, self.nearest)])) + '\n')
            # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中
            if this_id == self.same_pic_id:
                need_add = False
            elif this_id == self.must_be_same_id:
                need_add = False
                need_save = True
                this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label+self.must_same_str)
                this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label+self.must_same_str)
            elif this_id == self.must_be_not_same_id:
                this_label = self.new_person_str + str(self.current_new_person_id)
                self.current_new_person_id += 1
                this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label)
                this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label)
                need_add = True
                need_save = True
            elif this_id == self.maybe_same_id:
                this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label + self.maybe_same_str)
                this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label + self.maybe_same_str)
                need_add = False # prob在灰度区域的不如入,其余情况加入
                need_save = True
            else:
                log_file.write('\t'.join(map(str, ['error para :', this_id]))+'\n')
            if need_save:
                try:
                    if not os.path.exists(this_person_pic_folder):
                        os.makedirs(this_person_pic_folder)
                    if not os.path.exists(this_person_feature_folder):
                        os.makedirs(this_person_feature_folder)
                    # 直接存储图片对应的特征, 同时保存图片文件
                    this_pic_feature_name = os.path.join(this_person_feature_folder, image_id+'.p')
                    msgpack_numpy.dump(im_feature, open(this_pic_feature_name, 'wb'))
                    this_pic_face_name = os.path.join(this_person_pic_folder, image_id+'.jpg')
                    cv2.imwrite(this_pic_face_name, face_pic)
                except:
                    traceback.print_exc()
                    return self.unknown, 1.0, feature_str, False
            # 加入LSH Forest --- partial_fit
            if need_add:
                self.add_one_pic(im_feature, this_label)
                # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储]
            if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id:
                end = time.time()
                log_file.write('\t'.join(map(str, ['stat recognize_time :',(end - start), 'this_id :', self.trans_dic.get(this_id)]))+'\n')
                log_file.close()
                need_save = True
                return this_label.replace(self.must_same_str, ''), str(dist_label_list[0][0]), str(feature_str), str(need_save)
            else:
                # 灰度区域,不显示人名
                end = time.time()
                log_file.write('\t'.join(map(str, ['stat gray_area :',(end - start)]))+'\n')
                log_file.close()
                return self.unknown, str(dist_label_list[0][0]), str(feature_str), str(False)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, str(100.0), str(feature_str), str(False)
コード例 #42
0
def main(args):
  
    with tf.Graph().as_default():
      
        with tf.Session() as sess:
            
            # Read the file containing the pairs used for testing
            pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))

            # Get the paths for the corresponding images
            paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)

            # Load the model
            print('Model directory: %s' % args.model_dir)
            meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir))
            print('Metagraph file: %s' % meta_file)
            print('Checkpoint file: %s' % ckpt_file)
            facenet.load_model(args.model_dir, meta_file, ckpt_file)
            
            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            
            image_size = images_placeholder.get_shape()[1]
            embedding_size = embeddings.get_shape()[1]

            image_size = image_size.value
            # pdb.set_trace()


            # Run forward pass to calculate embeddings
            print('Runnning forward pass on LFW images')
            batch_size = args.lfw_batch_size
            nrof_images = len(paths)
            nrof_batches = int(math.ceil(1.0*nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))

            print('nrof_batches :{}'.format(nrof_batches))

            all_time = 0

            for i in range(nrof_batches):
                start_index = i*batch_size
                end_index = min((i+1)*batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                # pdb.set_trace()
                images = facenet.load_data(paths_batch, False, False, image_size)
                feed_dict = {images_placeholder:images}
                start = time()
                emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)
                end = time()
                all_time += (end - start)
                print('index: {}  time: {}'.format(i, (end-start)))
                # pdb.set_trace()
            print('all_time :', all_time)

            msgpack_numpy.dump((paths, emb_array, actual_issame), open('lfw_feature.p', 'wb'))

            tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(emb_array, 
                args.seed, actual_issame, nrof_folds=args.lfw_nrof_folds)


            print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
コード例 #43
0
    all_num = 0
    for root_dir, dir_list, pic_list in os.walk(folder):
        if len(pic_list) >= 10:
            pic_num_list.append((root_dir, len(pic_list)))
            all_num += len(pic_list)
            print len(pic_num_list), all_num
            current_person_index += 1


if __name__ == '__main__':
    pass
    # stat(folder='/data/liubo/face/baihe/person_mtcnn_160')
    #
    #

    new_sample_list = create_sample_list('/data/liubo/face/baihe/person_dlib_face')
    train_sample_list, valid_sample_list = train_test_split(new_sample_list, test_size=0.1)
    print len(train_sample_list), len(valid_sample_list)
    msgpack_numpy.dump((train_sample_list, valid_sample_list),
                       open('/data/liubo/face/baihe/person_dlib_face_sample_list_30.p', 'wb'))


    # train_sample_list, valid_sample_list = create_train_valid_list('/data/liubo/face/baihe/baihe_person_face_align')
    # msgpack_numpy.dump((train_sample_list, valid_sample_list),
    #                    open('/data/liubo/face/baihe/face_align_train_valid_sample_list_filter.p', 'wb'))





コード例 #44
0
    query_list = shanghai_1130_query_keyword_dic.keys()
    all_dist = []
    start1 = time()

    # # 用于输出数据, hadoop计算
    # new_dic = {}
    # for query in query_list:
    #     query_search_list = shanghai_1130_query_keyword_dic.get(query)
    #     keyword_list_vector, keyword_list_freq = create_wmd_data(query_search_list, word2vec_model)
    #     new_dic[query[0]] = (query_search_list, keyword_list_vector, keyword_list_freq)
    # cPickle.dump(new_dic, open('beijing_1130_query_keyword_dic.p', 'wb'))

    # 直接计算
    query_dist_dic = {}
    for index1, query1 in enumerate(query_list):
        for index2, query2 in enumerate(query_list):
            if index2 > index1:
                keyword_list1 = shanghai_1130_query_keyword_dic[query_list[index1]]
                keyword_list2 = shanghai_1130_query_keyword_dic[query_list[index2]]
                query_distance = cal_query_distance(keyword_list1, keyword_list2, word2vec_model)
                all_dist.append(query_distance)
                query_dist_dic[(query_list[index1][0], query_list[index2][0])] = query_distance
                # print index1, index2, query_distance
        print index1, (time() - start1)
        start1 = time()
    print len(all_dist)
    msgpack_numpy.dump((query_list, all_dist), open('/data/liubo/hotspot/query_search/all_query_dist_beijing_{}.p'.format(day), 'wb'))
    cPickle.dump(query_dist_dic, open('/data/liubo/hotspot/query_search/beijing_query_dist_dic_{}.p'.format(day), 'wb'))
    end = time()
    print 'time :', (end - start)
コード例 #45
0
 def cb(output):
     action, value = output.result()
     self.send_queue.put([ident, dump(action)])
     self.client[ident].append(
         TransitionExperience(frame, action, None, value))
コード例 #46
0
def train_valid_verif_model():
    all_data = []
    all_label = []
    all_pic_path_list = []
    count = 0
    path_feature_dic = msgpack.load(open('research_feature.p', 'rb'))
    not_in = 0
    not_in_pair = {}
    for line in open(pair_file):
        if count % 100 == 0:
            print count
        count += 1
        tmp = line.rstrip().split()
        if len(tmp) == 3:
            path1 = tmp[0]
            path2 = tmp[1]
            label = int(tmp[2])
            if path1 in path_feature_dic and path2 in path_feature_dic:
                try:
                    feature1 = np.asarray(path_feature_dic.get(path1))
                    feature2 = np.asarray(path_feature_dic.get(path2))
                    if len(feature1) < 100 or len(feature2) < 100:
                        print path1, path2
                        not_in += 1
                        not_in_pair[(path1, path2)] = 1
                        continue
                    feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0]))
                    feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0]))
                    predicts = pw.cosine_similarity(feature1, feature2)
                    all_data.append(predicts)
                    all_label.append(label)
                    all_pic_path_list.append((path1, path2))
                except:
                    traceback.print_exc()
                    # pdb.set_trace()
            else:
                traceback.print_exc()
                # pdb.set_trace()
    msgpack_numpy.dump((all_data, all_label, all_pic_path_list), open(feature_pack_file, 'wb'))

    (all_data, all_label, all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    all_data = np.asarray(all_data)
    all_data = np.reshape(all_data, newshape=(all_data.shape[0], all_data.shape[2]))
    all_label = np.asarray(all_label)
    print all_data.shape, all_label.shape


    kf = KFold(len(all_label), n_folds=10)
    all_acc = []
    for (train, valid) in kf:
        train_data = all_data[train]
        valid_data = all_data[valid]
        train_label = all_label[train]
        valid_label = all_label[valid]

        clf = LinearSVC()
        clf.fit(train_data, train_label)
        acc = accuracy_score(valid_label, clf.predict(valid_data))
        roc_auc = roc_auc_score(valid_label, clf.predict(valid_data))
        all_acc.append(acc)
        print acc, roc_auc
    print 'mean_acc :', np.mean(all_acc)
    clf = LinearSVC()
    clf.fit(all_data, all_label)
    cPickle.dump(clf, open(verification_model_file, 'wb'))
コード例 #47
0
def find_max_min():
    # 同一个人里找相似度最小的, 不同人里找相似度最大的
    lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb'))
    person_list = lfw_feature_dic.keys()
    same_person_score = []
    same_person_score_pair_dic = {}   # {score:[(path1,path2), ...,(path1,path2)]}
    no_same_person_score = []
    no_same_person_score_pair_dic = {}   # {score:[(path1,path2), ...,(path1,path2)]}
    heapq.heapify(same_person_score)
    pair_threshold = 3000

    for person_index, person in enumerate(person_list):
        start = time()
        path_feature_list = lfw_feature_dic.get(person)
        # 找出该人里所有可能的pair --- score越小越好(同一个人最不相似的照片)
        # 每次将最大的score去掉,加入更小的score,所以在加入是score取负,这样堆顶就是原来score最大的值
        length = len(path_feature_list)
        for index_i in range(length):
            for index_j in range(index_i, length):
                feature1, path1 = path_feature_list[index_i]
                feature2, path2 = path_feature_list[index_j]
                feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0]))
                feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0]))
                this_score = 0 - pw.cosine_similarity(feature1, feature2)[0][0]
                if len(same_person_score) > pair_threshold:
                    top_item = same_person_score[0]
                    if this_score < top_item:    # 更加不相似,加入
                        heapq.heappop(same_person_score)
                        heapq.heappush(same_person_score, this_score)
                        # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair)
                        if top_item in same_person_score_pair_dic:
                            same_person_score_pair_dic.pop(top_item)
                        pair_list = same_person_score_pair_dic.get(this_score, [])
                        pair_list.append((path1, path2))
                        same_person_score_pair_dic[this_score] = pair_list
                else:
                    heapq.heappush(same_person_score, this_score)
                    pair_list = same_person_score_pair_dic.get(this_score, [])
                    pair_list.append((path1, path2))
                    same_person_score_pair_dic[this_score] = pair_list

        # 找出所有可能的不相似的pair

        for other_person_index, other_person in enumerate(person_list[person_index+1:], start=person_index+1):
            other_path_feature_list = lfw_feature_dic.get(other_person)
            if other_person == person:
                continue
            other_length = len(other_path_feature_list)
            for index_i in range(length):
                for index_j in range(other_length):
                    feature1, path1 = path_feature_list[index_i]
                    feature2, path2 = other_path_feature_list[index_j]
                    feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0]))
                    feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0]))
                    this_score = pw.cosine_similarity(feature1, feature2)[0][0]
                    if len(no_same_person_score) > pair_threshold:
                        top_item = no_same_person_score[0]
                        if this_score < top_item:    # 更加相似, 加入
                            heapq.heappop(no_same_person_score)
                            heapq.heappush(no_same_person_score, this_score)
                            # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair)
                            if top_item in no_same_person_score_pair_dic:
                                no_same_person_score_pair_dic.pop(top_item)
                            pair_list = no_same_person_score_pair_dic.get(this_score, [])
                            pair_list.append((path1, path2))
                            no_same_person_score_pair_dic[this_score] = pair_list
                    else:
                        heapq.heappush(no_same_person_score, this_score)
                        pair_list = no_same_person_score_pair_dic.get(this_score, [])
                        pair_list.append((path1, path2))
                        no_same_person_score_pair_dic[this_score] = pair_list
        end = time()
        print person_index, person, (end - start), length
    msgpack_numpy.dump((same_person_score_pair_dic, same_person_score, no_same_person_score_pair_dic, no_same_person_score),
                       open(new_pair_pack_file, 'wb'))