Ejemplo n.º 1
0
def compute_wav_path(wav, feat_scp, feat_ark, utt2dur, utt2num_frames):
    feat, duration = Make_Spect(wav_path=wav[1], windowsize=0.02, stride=0.01, duration=True)
    # np_fbank = Make_Fbank(filename=uid2path[uid], use_energy=True, nfilt=c.TDNN_FBANK_FILTER)

    len_vec = len(feat.tobytes())
    key = wav[0]
    kaldi_io.write_mat(feat_ark, feat, key=key)

    feat_scp.write(str(key) + ' ' + str(feat_ark.name) + ':' + str(feat_ark.tell() - len_vec - 15) + '\n')
    utt2dur.write('%s %.6f\n' % (str(key), duration))
    utt2num_frames.write('%s %d\n' % (str(key), len(feat)))
def MakeFeatsProcess(out_dir, proid, t_queue, e_queue):
    #  wav_scp = os.path.join(data_path, 'wav.scp')
    feat_scp = os.path.join(out_dir, 'feat.%d.scp' % proid)
    feat_ark = os.path.join(out_dir, 'feat.%d.ark' % proid)
    utt2dur = os.path.join(out_dir, 'utt2dur.%d' % proid)
    utt2num_frames = os.path.join(out_dir, 'utt2num_frames.%d' % proid)

    feat_scp = open(feat_scp, 'w')
    feat_ark = open(feat_ark, 'wb')
    utt2dur = open(utt2dur, 'w')
    utt2num_frames = open(utt2num_frames, 'w')

    while not t_queue.empty():
        wav = t_queue.get()

        pair = wav.split()
        try:
            feat, duration = Make_Spect(wav_path=pair[1],
                                        windowsize=0.02,
                                        stride=0.01,
                                        duration=True)
            # np_fbank = Make_Fbank(filename=uid2path[uid], use_energy=True, nfilt=c.TDNN_FBANK_FILTER)

            len_vec = len(feat.tobytes())
            key = pair[0]
            kaldi_io.write_mat(feat_ark, feat, key='')

            feat_scp.write(
                str(key) + ' ' + str(feat_ark.name) + ':' +
                str(feat_ark.tell() - len_vec - 15) + '\n')
            utt2dur.write('%s %.6f' % (str(key), duration))
            utt2num_frames.write('%s %d' % (str(key), len(feat)))

        except:
            print("Error: %s" % pair[0])
            e_queue.put(pair[0])

        # if self.queue.qsize() % 1000 == 0:
        print('==> Process %s: %s left' % (str(proid), str(t_queue.qsize())))

    feat_scp.close()
    feat_ark.close()
    utt2dur.close()
    utt2num_frames.close()

    print('>> Process {} finished!'.format(proid))
Ejemplo n.º 3
0
def compute_wav_path(wav, feat_scp, feat_path, utt2dur, utt2num_frames):
    feat, duration = Make_Spect(wav_path=wav[1],
                                windowsize=0.02,
                                stride=0.01,
                                duration=True)
    # np_fbank = Make_Fbank(filename=uid2path[uid], use_energy=True, nfilt=c.TDNN_FBANK_FILTER)
    key = wav[0]
    # pdb.set_trace()
    save_path = os.path.join(feat_path, wav[0] + '.npy')
    # print('save path:' + save_path)
    np.save(save_path, feat)

    feat_scp.write(str(key) + ' ' + save_path + '\n')
    utt2dur.write('%s %.6f\n' % (str(key), duration))
    utt2num_frames.write('%s %d\n' % (str(key), len(feat)))
Ejemplo n.º 4
0
def MakeFeatsProcess(out_dir, proid, t_queue, e_queue):
    #  wav_scp = os.path.join(data_path, 'wav.scp')
    feat_scp = os.path.join(out_dir, 'feat.%d.scp' % proid)
    feat_path = os.path.join(out_dir, 'feats.%d' % proid)
    if not os.path.exists(feat_path):
        os.makedirs(feat_path)

    utt2dur = os.path.join(out_dir, 'utt2dur.%d' % proid)
    utt2num_frames = os.path.join(out_dir, 'utt2num_frames.%d' % proid)

    feat_scp = open(feat_scp, 'w')
    utt2dur = open(utt2dur, 'w')
    utt2num_frames = open(utt2num_frames, 'w')

    while not t_queue.empty():
        comm = task_queue.get()
        # print('111')
        pair = comm.split()
        key = pair[0]
        try:
            feat, duration = Make_Spect(wav_path=pair[1],
                                        windowsize=0.02,
                                        stride=0.01,
                                        duration=True)
            # np_fbank = Make_Fbank(filename=uid2path[uid], use_energy=True, nfilt=c.TDNN_FBANK_FILTER)

            save_path = os.path.join(feat_path, key + '.npy')
            np.save(save_path, feat)

            feat_scp.write(str(key) + ' ' + save_path + '\n')
            utt2dur.write('%s %.6f\n' % (str(key), duration))
            utt2num_frames.write('%s %d\n' % (str(key), len(feat)))
        except:
            e_queue.put(key)

        if t_queue.qsize() % 100 == 0:
            print(
                '\rProcess [%3s] There are [%6s] utterances left, with [%6s] errors.'
                % (str(proid), str(t_queue.qsize()), str(e_queue.qsize())),
                end='')

    feat_scp.close()
    utt2dur.close()
    utt2num_frames.close()
Ejemplo n.º 5
0
                        # command = command.rstrip(' - ') + " " + temp_wav
                        # print(command)
                        spid, stdout, error = RunCommand(command)
                        # os.waitpid(spid, 0)

                        # with open(temp_wav, 'wb') as wav_f:
                        #     wav_f.write(stdout)
>>>>>>> Server/Server
                        if args.feat_type == 'fbank':
                            feat, duration = Make_Fbank(filename=temp_wav, filtertype=args.filter_type, use_energy=True,
                                                        lowfreq=args.lowfreq, log_scale=args.log_scale,
                                                        nfft=args.nfft, nfilt=args.filters, normalize=args.normalize,
                                                        duration=True, windowsize=args.windowsize,
                                                        multi_weight=args.multi_weight)
                        elif args.feat_type == 'spectrogram':
                            feat, duration = Make_Spect(wav_path=temp_wav, windowsize=args.windowsize,
<<<<<<< HEAD
                                                        lowfreq=args.lowfreq, stride=args.stride, duration=True,
                                                        nfft=args.nfft, normalize=args.normalize)
                        elif args.feat_type == 'mfcc':
                            feat, duration = Make_MFCC(filename=temp_wav, numcep=args.numcep, nfilt=args.filters,
                                                       lowfreq=args.lowfreq, normalize=args.normalize, duration=True,
                                                       use_energy=True)
=======
                                                        bandpass=args.bandpass, lowfreq=args.lowfreq,
                                                        highfreq=args.highfreq,
                                                        log_scale=args.log_scale,
                                                        stride=args.stride, duration=True, nfft=args.nfft,
                                                        normalize=args.normalize)
                        elif args.feat_type == 'mfcc':
                            feat, duration = Make_MFCC(filename=temp_wav, numcep=args.numcep, nfilt=args.filters,
def MakeFeatsProcess(lock, out_dir, ark_dir, ark_prefix, proid, t_queue,
                     e_queue):
    #  wav_scp = os.path.join(data_path, 'wav.scp')
    feat_scp = os.path.join(out_dir, 'feat.%d.temp.scp' % proid)

    utt2dur = os.path.join(out_dir, 'utt2dur.%d' % proid)
    utt2num_frames = os.path.join(out_dir, 'utt2num_frames.%d' % proid)

    feat_scp_f = open(feat_scp, 'w')
    utt2dur_f = open(utt2dur, 'w')

    if args.feat_format == 'kaldi':
        feat_ark = os.path.join(out_dir,
                                '%s_feat.%d.ark' % (ark_prefix, proid))
        feat_ark_f = open(feat_ark, 'wb')

    utt2num_frames_f = open(utt2num_frames, 'w')
    feat_dir = os.path.join(ark_dir, ark_prefix)
    if not os.path.exists(feat_dir):
        os.makedirs(feat_dir)

    temp_dir = out_dir + '/temp'
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)

    while True:
        lock.acquire()  # 加上锁
        if not t_queue.empty():
            comm = task_queue.get()
            lock.release()  # 释放锁
            pair = comm.split()
            key = pair[0]
            try:
                if len(pair) > 2:
                    command = ' '.join(pair[1:])
                    if command.endswith('|'):
                        command = command.rstrip('|')
                    spid, stdout, error = RunCommand(command)
                    # os.waitpid(spid, 0)

                    temp_wav = temp_dir + '/%s.%s' % (key, args.data_format)
                    with open(temp_wav, 'wb') as wav_f:
                        wav_f.write(stdout)
                    if args.feat_type == 'fbank':
                        feat, duration = Make_Fbank(
                            filename=temp_wav,
                            filtertype=args.filter_type,
                            use_energy=True,
                            lowfreq=args.lowfreq,
                            nfft=args.nfft,
                            nfilt=args.filters,
                            normalize=args.normalize,
                            duration=True,
                            windowsize=args.windowsize,
                            multi_weight=args.multi_weight)
                    elif args.feat_type == 'spectrogram':
                        feat, duration = Make_Spect(wav_path=temp_wav,
                                                    windowsize=args.windowsize,
                                                    lowfreq=args.lowfreq,
                                                    stride=args.stride,
                                                    duration=True,
                                                    nfft=args.nfft,
                                                    normalize=args.normalize)
                    elif args.feat_type == 'mfcc':
                        feat, duration = Make_MFCC(filename=temp_wav,
                                                   numcep=args.numcep,
                                                   nfilt=args.filters,
                                                   lowfreq=args.lowfreq,
                                                   normalize=args.normalize,
                                                   duration=True,
                                                   use_energy=True)

                    os.remove(temp_wav)

                else:
                    if args.feat_type == 'fbank':
                        feat, duration = Make_Fbank(
                            filename=pair[1],
                            filtertype=args.filter_type,
                            use_energy=True,
                            nfft=args.nfft,
                            windowsize=args.windowsize,
                            lowfreq=args.lowfreq,
                            nfilt=args.filters,
                            duration=True,
                            normalize=args.normalize,
                            multi_weight=args.multi_weight)
                    elif args.feat_type == 'spectrogram':
                        feat, duration = Make_Spect(wav_path=pair[1],
                                                    windowsize=args.windowsize,
                                                    lowfreq=args.lowfreq,
                                                    stride=args.stride,
                                                    duration=True,
                                                    nfft=args.nfft,
                                                    normalize=args.normalize)
                    elif args.feat_type == 'mfcc':
                        feat, duration = Make_MFCC(filename=pair[1],
                                                   numcep=args.numcep,
                                                   nfilt=args.filters,
                                                   lowfreq=args.lowfreq,
                                                   normalize=args.normalize,
                                                   duration=True,
                                                   use_energy=True)
                    # feat = np.load(pair[1]).astype(np.float32)

                feat = feat.astype(np.float32)
                if args.feat_format == 'kaldi':
                    kaldi_io.write_mat(feat_ark_f, feat, key='')
                    offsets = feat_ark + ':' + str(feat_ark_f.tell() -
                                                   len(feat.tobytes()) - 15)
                    # print(offsets)
                    feat_scp_f.write(key + ' ' + offsets + '\n')
                elif args.feat_format == 'npy':
                    npy_path = os.path.join(feat_dir, '%s.npy' % key)
                    np.save(npy_path, feat)
                    feat_scp_f.write(key + ' ' + npy_path + '\n')

                utt2dur_f.write('%s %.6f\n' % (key, duration))
                utt2num_frames_f.write('%s %d\n' % (key, len(feat)))
            except Exception as e:
                print(e)
                e_queue.put(key)

            # if t_queue.qsize() % 100 == 0:
            print('\rProcess [%6s] There are [%6s] utterances' \
                  ' left, with [%6s] errors.' % (str(os.getpid()), str(t_queue.qsize()), str(e_queue.qsize())),
                  end='')
        else:
            lock.release()  # 释放锁
            # print('\n>> Process {}:  queue empty!'.format(os.getpid()))
            break

    feat_scp_f.close()
    utt2dur_f.close()
    if args.feat_format == 'kaldi':
        feat_ark_f.close()
    utt2num_frames_f.close()

    new_feat_scp = os.path.join(out_dir, 'feat.%d.scp' % proid)
    if args.feat_format == 'kaldi' and args.compress:
        new_feat_ark = os.path.join(feat_dir, 'feat.%d.ark' % proid)
        compress_command = "copy-feats --compress=true scp:{} ark,scp:{},{}".format(
            feat_scp, new_feat_ark, new_feat_scp)

        pid, stdout, stderr = RunCommand(compress_command)
        # print(stdout)
        if os.path.exists(new_feat_scp) and os.path.exists(new_feat_ark):
            os.remove(feat_ark)
    else:
        shutil.copy(feat_scp, new_feat_scp)
Ejemplo n.º 7
0
@Time: 2020/11/29 20:31
@Overview:
"""
import matplotlib.pyplot as plt
from Process_Data.Compute_Feat.compute_vad import ComputeVadEnergy
from Process_Data.audio_processing import Make_Spect
import numpy as np

# v1 = 'Data/dataset/voxceleb1/8k_radio_v3/id10001/1zcIwhmdeo4/00001.wav'
v1 = 'Data/dataset/aishell-2/data/C0001/IC0001W0001-8k.wav'
a1 = 'Data/dataset/wav_test/01-yangxiaokang/tmp_0001-U000013_15s_8k.wav'
r1 = 'Data/dataset/wav_test/00-yangwenhao/tmp_0009-U000000_15s_8k.wav'

v1_spect = Make_Spect(v1,
                      windowsize=0.02,
                      stride=0.01,
                      nfft=320,
                      normalize=False)
a1_spect = Make_Spect(a1,
                      windowsize=0.02,
                      stride=0.01,
                      nfft=320,
                      normalize=False)
r1_spect = Make_Spect(r1,
                      windowsize=0.02,
                      stride=0.01,
                      nfft=320,
                      normalize=False)

v1_energy = np.log(np.exp(v1_spect).sum(axis=1)).reshape(-1, 1)
a1_energy = np.log(np.exp(a1_spect).sum(axis=1)).reshape(-1, 1)