Example #1
0
def speaker_diarization():
    file = '/home/daiab/machine_disk/data/voice_identity/dianxin/1.wav'
    use_LDA = False
    plot = True
    num_speaker = 2
    if use_LDA:
        pos, cls = aS.speaker_diarization(file,
                                          num_speaker,
                                          mt_size=4.0,
                                          mt_step=0.1,
                                          st_win=0.05,
                                          st_step=0.01,
                                          plot=plot)
    else:
        pos, cls = aS.speaker_diarization(file, num_speaker, lda_dim=0, plot=plot)
    fr, x = audio_basic_io.read_audio_file(file)

    sep_voice = [[], []]
    pre_pos = 0
    cut_num = int(x.shape[0] * 0.0001)
    print('cut_num', cut_num)
    for i, c in enumerate(cls):
        c = int(c)
        v_from = pre_pos
        v_to = int(pos[i] * fr)
        sep_voice[c] += x[v_from + cut_num: v_to - cut_num].tolist()
        pre_pos = v_to

    print(len(sep_voice[0]), len(sep_voice[1]))
    wavfile.write('./0.wav', fr, np.array(sep_voice[0], dtype=np.int16))
    wavfile.write('./1.wav', fr, np.array(sep_voice[1], dtype=np.int16))
def speakerDiarizationWrapper(inputFile, numSpeakers, useLDA):
    if useLDA:
        aS.speaker_diarization(inputFile, numSpeakers, plot_res=True)
    else:
        aS.speaker_diarization(inputFile,
                               numSpeakers,
                               lda_dim=0,
                               plot_res=True)
Example #3
0
def dia(filename, speakers, lda_dim=0):
    global labels
    file = open('Speaker_Diarization.txt', 'w').close()
    main_file(filename, overall=True)
    timestamp, classes = speaker_diarization(filename=filename,
                                             n_speakers=speakers,
                                             lda_dim=lda_dim)
    file = open('Speaker_Diarization.txt', 'a')
    file.write('Timestamp,Classes\n')
    for i in range(len(timestamp)):
        file.write(f'{timestamp[i]},{int(classes[i])}\n')

    file.close()
    df = pd.read_csv('Speaker_Diarization.txt')
    labels = df['Classes'].unique()
    num_labels = len(labels)
    previous_label = df['Classes'][0]
    sda = dict()
    a = []
    segments = []
    for i in range(df.shape[0]):
        if i == 0:
            a.append(df['Classes'][i])
            a.append(df['Timestamp'][i])
        elif df['Classes'][i - 1] == df['Classes'][i]:
            a.append(df['Timestamp'][i])

        else:
            segments.append([a[0], a[1], a[-1]])
            a = []
            a.append(df['Classes'][i])
            a.append(df['Timestamp'][i])
    p = []
    for label in labels:
        for segment in segments:
            if segment[0] == label:
                p.append([segment[1], segment[2]])

        sda[label] = p
        p = []

    speech = AudioSegment.from_wav(filename)
    for key, value in sda.items():
        speaker = 0
        for parts in value:
            speaker += speech[parts[0] * 1000:parts[1] * 1000]

        file = filename.split('.')[0] + str('_speaker_') + str(key) + '.wav'
        print(speaker)
        print(key, value)
        if len(value) == 0:
            labels = list(labels)
            labels.remove(key)
        else:
            speaker.export(file, format='wav')
            print('Files Exported')
            main_file(file=file, folder='uploads', labels=labels)