def iden(testfile,fa_data_dir,iden_model,max_sec, step_sec, frame_step):
    # 读入测试数据、标签
    print("Use {} for test".format(testfile))

    iden_list = np.loadtxt(testfile, str,delimiter=",")

    labels = np.array([int(i[1]) for i in iden_list])
    voice_list = np.array([os.path.join(fa_data_dir, i[0]) for i in iden_list])

    # Load model
    print("Load model form {}".format(iden_model))
    model = load_model(iden_model)

    print("Start identifying...")
    total_length = len(voice_list)
    res, p_labels = [], []
    buckets = build_buckets(max_sec, step_sec, frame_step)
    for c, ID in enumerate(voice_list):
        if c % 1000 == 0: print('Finish identifying for {}/{}th wav.'.format(c, total_length))
        specs = get_fft_spectrum(ID, buckets)
        v = model.predict(specs.reshape(1, *specs.shape, 1))
        v = np.squeeze(v)
        p_labels.append(np.argmax(v))

    p_labels = np.array(p_labels)
    compare = (labels == p_labels)
    counts = sum(compare==True)
    acc = counts/total_length
    print(acc)
Example #2
0
def get_fft_features_from_list_file(list_file, max_sec):

    buckets = build_buckets(max_sec, c.BUCKET_STEP, c.FRAME_STEP)
    result = pd.read_csv(list_file, delimiter=",")
    result['features'] = result['filename'].apply(
        lambda x: get_fft_spectrum(x, buckets))

    # iterate over rows with iterrows
    dataX = []
    dataY = []

    for index, row in result.iterrows():
        x = row['features']
        y = row['speaker']
        # print(row['speaker'])
        if isinstance(y, str):
            y = y.replace(c.SPEAKER_PREFIX, '')
            y = int(y, 10)

        dataY.append(y)
        values = x.reshape(*x.shape, 1)
        # print("here", values.shape)
        if (values.shape[1] == c.MAX_SEC * 100):  #same time
            dataX.append(values)

    dataX = np.stack((dataX), axis=0)
    dataY = np.asarray(dataY)
    # print(dataY)
    print("X.shape: {}, Y.shape{}".format(dataX.shape, dataY.shape))

    return dataX, dataY
Example #3
0
def score(testfile, fa_data_dir, test_model_path, max_sec, step_sec,
          frame_step, metric):
    print("Use {} for test".format(testfile))

    verify_list = np.loadtxt(testfile, str)

    verify_lb = np.array([int(i[0]) for i in verify_list])
    list1 = np.array([os.path.join(fa_data_dir, i[1]) for i in verify_list])
    list2 = np.array([os.path.join(fa_data_dir, i[2]) for i in verify_list])

    total_list = np.concatenate((list1, list2))
    unique_list = np.unique(total_list)

    # Load model
    print("Load model form {}".format(test_model_path))
    model = load_model(test_model_path)
    model = Model(inputs=model.layers[0].input,
                  outputs=model.layers[34].output)  # 取 fc7 层的输出(1024,)

    print("Start testing...")
    total_length = len(unique_list)  # 4715
    feats, scores, labels = [], [], []
    buckets = build_buckets(max_sec, step_sec, frame_step)
    for c, ID in enumerate(unique_list):
        if c % 50 == 0:
            print('Finish extracting features for {}/{}th wav.'.format(
                c, total_length))
        specs = get_fft_spectrum(ID, buckets)
        v = model.predict(specs.reshape(1, *specs.shape, 1))
        feats += [v]

    feats = np.array(feats)

    # 计算相似度
    for c, (p1, p2) in enumerate(zip(list1, list2)):
        ind1 = np.where(unique_list == p1)[0][0]
        ind2 = np.where(unique_list == p2)[0][0]

        v1 = feats[ind1, 0, 0]
        v2 = feats[ind2, 0, 0]

        # print(v1 - v2)

        # print(np.sum(v1*v1))
        # print(np.sum(v2*v2))

        scores += [np.sum(v1 * v2)]
        labels += [verify_lb[c]]

        # print("scores: ",scores)
        # print("labels: ",labels)

        # if c>0:
        #     break

    scores = np.array(scores)
    labels = np.array(labels)

    eer = calculate_eer(labels, scores)
    print("EER: {}".format(eer))
Example #4
0
def get_embeddings_from_list_file(model, list_file, max_sec):
    buckets = build_buckets(max_sec, c.BUCKET_STEP, c.FRAME_STEP)
    result = pd.read_csv(list_file, delimiter=",")
    result['features'] = result['filename'].apply(
        lambda x: get_fft_spectrum(x, buckets))
    result['embedding'] = result['features'].apply(
        lambda x: np.squeeze(model.predict(x.reshape(1, *x.shape, 1))))
    return result[['filename', 'speaker', 'embedding']]
 def get_ids_random_feature(self, Id):
     id_directory = os.path.join(self.directory, Id)
     id_random_speech = os.path.join(
         id_directory, random.choice(os.listdir(id_directory)))
     id_random_utterance_path = os.path.join(
         id_random_speech, random.choice(os.listdir(id_random_speech)))
     fft = get_fft_spectrum(id_random_utterance_path, self.buckets)
     return fft
Example #6
0
def get_train_list(path):
    buckets = build_buckets(c.MAX_SEC, c.BUCKET_STEP, c.FRAME_STEP)
    read_csv = pd.read_csv(path, delimiter=",")
    print("Preprocessing voice data...")
    read_csv["voice"] = read_csv["filename"].apply(
        lambda x: get_fft_spectrum(c.FA_DIR + x, buckets))
    read_csv["lable"] = read_csv["speaker"].apply(lambda x: to_one_hot(x - 1))

    return read_csv
Example #7
0
def get_embeddings_from_file(model, file_path, max_sec):
    buckets = build_buckets(max_sec, c.BUCKET_STEP, c.FRAME_STEP)
    result = pd.DataFrame({'filename': [file_path]})
    result['features'] = result['filename'].apply(
        lambda x: get_fft_spectrum(x, buckets))
    result['embedding'] = result['features'].apply(
        lambda x: np.squeeze(model.predict(x.reshape(1, *x.shape, 1))))
    # 	result['speaker'] = 19
    return result[['filename', 'embedding']]
Example #8
0
def get_embeddings_from_list_file(model, list_file, max_sec):
    buckets = build_buckets(max_sec, c.BUCKET_STEP, c.FRAME_STEP)
    result = pd.read_csv(list_file, delimiter=",")
    result['features'] = result['filename'].apply(
        lambda x: get_fft_spectrum(x, buckets)
    )  # Get as much fft as possible from the buckets 'size' for each filename
    result['embedding'] = result['features'].apply(
        lambda x: np.squeeze(model.predict(x.reshape(1, *x.shape, 1)))
    )  # Squeeze function removes all single-dimensionnal entries from the shape of an array
    return result[['filename', 'speaker', 'embedding']]
    def get_ids_features(self, Id, count=np.iinfo(np.int32).max):
        yielded = 0
        features = []

        for path in self.__file_iterator__(Id):
            if yielded == count:
                break
            feature = get_fft_spectrum(path, self.buckets)
            features.append(feature)
            yielded += 1

        return features
def get_embeddings_from_list_file(model, list_file, max_sec):
	buckets = build_buckets(max_sec, c.BUCKET_STEP, c.FRAME_STEP)
	result = pd.read_csv(list_file, delimiter=",")
	# print(result)
	# time.sleep(10)
	result['features'] = result['filename'].apply(lambda x: get_fft_spectrum(c.FA_DIR+x, buckets)) # 获取每个人的语音特征 矩阵
	# print(result['features'])
	# time.sleep(10)
	result['embedding'] = result['features'].apply(lambda x: np.squeeze(model.predict(x.reshape(1,*x.shape,1))))# to input (1, 512, 1000, 1) per voice(feature)
	# print(result['embedding'])
	# time.sleep(10)
	return result[['filename','speaker','embedding']]
Example #11
0
def load_wave(wave_file):
    buckets = build_buckets(10, 1, 0.01)
    data = get_fft_spectrum(wave_file, buckets)

    if data.shape[1] == 300:
        pass
    else:
        start = np.random.randint(0, data.shape[1] - 300)
        data = data[:, start:start + 300]

    data = np.expand_dims(data, -1)
    return data
Example #12
0
def iden(testfile, fa_data_dir, iden_model, max_sec, step_sec, frame_step, dim,
         batch_size, n_classes, epoch):
    # 读入测试数据、标签
    print("Use {} for test".format(testfile))

    iden_list = np.loadtxt(testfile, str, delimiter=",")

    voice_list = np.array([os.path.join(fa_data_dir, i[0]) for i in iden_list])
    total_length = len(voice_list)
    device = torch.device('cuda')
    labels = torch.tensor([int(i[1]) for i in iden_list]).to(device)
    criterion = nn.CrossEntropyLoss()

    # Load model
    print("Load model form {}".format(iden_model))
    model = torch.load(iden_model).to(device)

    print("Start identifying...")

    acc = 0
    loss = 0
    model.eval()
    for num, ID in enumerate(voice_list):
        if num % 100 == 0:
            print('Finish identifying for {}/{}th wav.'.format(
                num, total_length))
        b_data = torch.tensor(
            get_fft_spectrum(ID,
                             build_buckets(max_sec, step_sec, frame_step),
                             mode="test").tolist()).to(device)
        b_data = b_data.unsqueeze(0).to(device)
        with torch.no_grad():
            eval_predict, tmp_eval_accuracy = model(b_data.unsqueeze(0),
                                                    labels[num].unsqueeze(0),
                                                    1, n_classes)
        tmp_eval_loss = criterion(eval_predict,
                                  labels[num].unsqueeze(0).long()).to(device)
        loss += tmp_eval_loss.item()
        acc += tmp_eval_accuracy

    loss /= total_length
    acc /= total_length
    viz.line(torch.tensor([loss]), [epoch],
             update='append',
             win='test_loss',
             opts={'title': 'test_loss'})
    viz.line(torch.tensor([acc]), [epoch + 1],
             update='append',
             win='test_acc',
             opts={'title': 'test_acc'})

    print("eval_loss:", loss)
    print("eval_acc:", acc)
    def _gene_Data(self, list_IDs_temp, indexes):
        '得到频谱数组和类标签,以输入模型进行训练'
        b_data = np.empty((self.batch_size,) + self.dim)
        b_labels = np.empty((self.batch_size,), dtype=int)

        for i, ID in enumerate(list_IDs_temp):
            b_data[i, :, :, 0] = get_fft_spectrum(ID, self.buckets)
            b_labels[i] = self.labels[indexes[i]]  # 0~n-1
            # b_labels[i] = self.labels[indexes[i]] - 1 # 1~n

        b_labels = keras.utils.to_categorical(b_labels, num_classes=self.n_classes)
        b_labels = b_labels.reshape(self.batch_size, 1, 1, self.n_classes)

        # os.system("pause")

        return b_data, b_labels
Example #14
0
def get_np_list(file_list, buckets):
    voice = np.empty()
    for pt in range(len(file_list)):
        np.concatenate(get_fft_spectrum(c.FA_DIR + pt, buckets))
    print(voice.shape)
Example #15
0
def get_embedding(model, wav_file, max_sec):
    buckets = build_buckets(max_sec, c.BUCKET_STEP, c.FRAME_STEP)
    signal = get_fft_spectrum(wav_file, buckets)
    embedding = np.squeeze(model.predict(signal.reshape(1, *signal.shape, 1)))
    return embedding
Example #16
0
def give_vggvox_input(input_file_path):
    buckets = build_buckets(c.MAX_SEC, c.BUCKET_STEP, c.FRAME_STEP)
    file = pd.read_csv(input_file_path, delimiter=",")
    output = file['filename'].apply(lambda x: get_fft_spectrum(x, buckets))
    return output
Example #17
0
def give_vggvox_input_simple(input_file_path):
    buckets = build_buckets(c.MAX_SEC, c.BUCKET_STEP, c.FRAME_STEP)
    output = get_fft_spectrum(input_file_path, buckets)
    return output