Python computeModSpecFromLpc Examples

Programming Language: Python

Namespace/Package Name: features

Method/Function: computeModSpecFromLpc

Examples at hotexamples.com: 7

Python computeModSpecFromLpc - 7 examples found. These are the top rated real world Python examples of features.computeModSpecFromLpc extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def get_modspec(args, srate=16000, window=np.hanning):

    wavs = args.scp
    add_reverb = args.add_reverb
    set_unity_gain = args.set_unity_gain
    nmodulations = args.nmodulations
    order = args.order
    fduration = args.fduration_modspec
    frate = args.frate
    nfilters = args.nfilters

    fbank = createFbank(nfilters, int(2 * fduration * srate), srate)

    if add_reverb:
        if add_reverb == 'small_room':
            sr_r, rir = read('./RIR/RIR_SmallRoom1_near_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'large_room':
            sr_r, rir = read('./RIR/RIR_LargeRoom1_far_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'clean':
            print('%s: No reverberation added!' % sys.argv[0])
        else:
            raise ValueError('Invalid type of reverberation!')

    with open(wavs, 'r') as fid:
        all_feats = {}

        for line in fid:
            tokens = line.strip().split()
            uttid, inwav = tokens[0], ' '.join(tokens[1:])

            if inwav[-1] == '|':
                proc = subprocess.run(inwav[:-1],
                                      shell=True,
                                      stdout=subprocess.PIPE)
                sr, signal = read(io.BytesIO(proc.stdout))
            else:
                sr, signal = read(inwav)
            assert sr == srate, 'Input file has different sampling rate.'

            # I want to work with numbers from 0 to 1 so....
            signal = signal / np.power(2, 15)

            if add_reverb:
                if not add_reverb == 'clean':
                    signal = addReverb(signal, rir)

            time_frames = np.array([
                frame
                for frame in getFrames(signal, srate, frate, fduration, window)
            ])

            cos_trans = freqAnalysis.dct(time_frames) / np.sqrt(
                2 * int(srate * fduration))

            [frame_num, ndct] = np.shape(cos_trans)

            if set_unity_gain:
                feats = np.zeros((frame_num, nfilters * (nmodulations - 1)))
            else:
                feats = np.zeros((frame_num, nfilters * nmodulations))

            print('%s: Computing Features for file: %s' % (sys.argv[0], uttid))
            sys.stdout.flush()
            for i in range(frame_num):
                if set_unity_gain:
                    each_feat = np.zeros([nfilters, nmodulations - 1])
                else:
                    each_feat = np.zeros([nfilters, nmodulations])
                for j in range(nfilters):
                    filt = fbank[j, 0:-1]
                    band_dct = filt * cos_trans[i, :]
                    xlpc, gg = computeLpcFast(
                        band_dct, order)  # Compute LPC coefficients
                    if set_unity_gain:
                        gg = 1
                    mod_spec = computeModSpecFromLpc(gg, xlpc, nmodulations)
                    if set_unity_gain:
                        mod_spec = mod_spec[1:]
                    each_feat[j, :] = mod_spec
                if set_unity_gain:
                    each_feat = np.reshape(each_feat,
                                           (1, nfilters * (nmodulations - 1)))
                else:
                    each_feat = np.reshape(each_feat,
                                           (1, nfilters * nmodulations))
                feats[i, :] = each_feat

            all_feats[uttid] = feats

    return all_feats

Example #2

Show file

def getFeats(args, srate=16000, window=np.hanning):
    wavs = args.scp
    scp_type = args.scp_type
    outfile = args.outfile
    add_reverb = args.add_reverb
    coeff_0 = args.coeff_0
    coeff_n = args.coeff_n
    order = args.order
    fduration = args.fduration
    frate = args.frate
    nfilters = args.nfilters
    kaldi_cmd = args.kaldi_cmd

    # Set up mel-filterbank
    fbank_type = args.fbank_type.strip().split(',')
    if args.complex_modulation:
        dur = int(fduration * srate)
    else:
        dur = int(2 * fduration * srate)

    if fbank_type[0] == "mel":
        if len(fbank_type) < 2:
            raise ValueError('Mel filter bank not configured properly....')
        fbank = createFbank(nfilters,
                            dur,
                            srate,
                            warp_fact=float(fbank_type[1]))
    elif fbank_type[0] == "cochlear":
        if len(fbank_type) < 6:
            raise ValueError(
                'Cochlear filter bank not configured properly....')
        if int(fbank_type[3]) == 1:
            print(
                '%s: Alpha is fixed and will not change as a function of the center frequency...'
                % sys.argv[0])
        fbank = createFbankCochlear(nfilters,
                                    dur,
                                    srate,
                                    om_w=float(fbank_type[1]),
                                    alp=float(fbank_type[2]),
                                    fixed=int(fbank_type[3]),
                                    bet=float(fbank_type[4]),
                                    warp_fact=float(fbank_type[5]))
    else:
        raise ValueError(
            'Invalid type of filter bank, use mel or cochlear with proper configuration'
        )
    coeff_num = coeff_n - coeff_0 + 1

    if args.keep_even:
        temp = np.arange(0, coeff_num)
        if coeff_0 % 2 == 0:
            # It starts from odd coefficients
            feat_len = temp[1::2].shape[0]
        else:
            feat_len = temp[0::2].shape[0]

    elif args.complex_modulation:
        if args.absolute_value:
            feat_len = coeff_num
        else:
            feat_len = 2 * coeff_num
    else:
        feat_len = coeff_num

    if args.compensate_noise:
        if args.complex_modulation:
            fmax = coeff_num / (fduration)
            faxis = np.linspace(0, fmax, coeff_n)
        else:
            fmax = coeff_num / (2 * fduration)
            faxis = np.linspace(0, fmax, coeff_n)

    if args.no_window:
        print('%s: Using square windows' % sys.argv[0])
        window = sq_wind

    if add_reverb:
        if add_reverb == 'small_room':
            sr_r, rir = read('./RIR/RIR_SmallRoom1_near_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'large_room':
            sr_r, rir = read('./RIR/RIR_LargeRoom1_far_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'clean':
            print('%s: No reverberation added!' % sys.argv[0])
        else:
            raise ValueError('Invalid type of reverberation!')

    with open(wavs, 'r') as fid:
        all_feats = {}

        for line in fid:
            tokens = line.strip().split()
            uttid, inwav = tokens[0], ' '.join(tokens[1:])

            if scp_type == 'wav':
                if inwav[-1] == '|':
                    try:
                        proc = subprocess.run(inwav[:-1],
                                              shell=True,
                                              stdout=subprocess.PIPE)
                        sr, signal = read(io.BytesIO(proc.stdout))
                        skip_rest = False
                    except Exception:
                        skip_rest = True
                else:
                    try:
                        sr, signal = read(inwav)
                        skip_rest = False
                    except Exception:
                        skip_rest = True

                assert sr == srate, 'Input file has different sampling rate.'
            elif scp_type == 'segment':
                try:
                    cmd = 'wav-copy ' + inwav + ' - '
                    proc = subprocess.run(cmd,
                                          shell=True,
                                          stdout=subprocess.PIPE)
                    sr, signal = read(io.BytesIO(proc.stdout))
                    skip_rest = False
                except Exception:
                    skip_rest = True
            else:
                raise ValueError(
                    'Invalid type of scp type, it should be either wav or segment'
                )

            if not skip_rest:
                # I want to work with numbers from 0 to 1 so....
                # signal = signal / np.power(2, 15)

                if add_reverb:
                    if not add_reverb == 'clean':
                        signal = addReverb(signal, rir)

                time_frames = np.array([
                    frame for frame in getFrames(signal, srate, frate,
                                                 fduration, window)
                ])

                if args.complex_modulation:
                    cos_trans = freqAnalysis.ifft(time_frames)
                    cos_trans = cos_trans[:, :int(fduration * srate / 2)]
                else:
                    cos_trans = freqAnalysis.dct(time_frames) / np.sqrt(
                        2 * int(srate * fduration))

                [frame_num, ndct] = np.shape(cos_trans)

                feats = np.zeros((frame_num, nfilters * feat_len))

                print('%s: Computing Features for file: %s, also %d' %
                      (sys.argv[0], uttid, time_frames.shape[0]))
                sys.stdout.flush()
                for i in range(frame_num):

                    each_feat = np.zeros([nfilters, feat_len])
                    for j in range(nfilters):
                        filt = fbank[j, 0:-1]
                        band_dct = filt * cos_trans[i, :]
                        if args.complex_modulation:
                            xlpc, gg = computeLpcFast(
                                band_dct, order,
                                keepreal=False)  # Compute LPC coefficients
                            mod_spec = computeModSpecFromLpc(gg, xlpc, coeff_n)
                            if args.compensate_noise:
                                mod_spec = mod_spec * faxis
                            if args.absolute_value:
                                temp2 = np.abs(mod_spec[coeff_0 - 1:coeff_n])
                            else:
                                temp2 = np.append(
                                    np.real(mod_spec[coeff_0 - 1:coeff_n]),
                                    np.imag(mod_spec[coeff_0 - 1:coeff_n]))
                        else:
                            xlpc, gg = computeLpcFast(band_dct, order)
                            mod_spec = np.real(
                                computeModSpecFromLpc(gg, xlpc, coeff_n))
                            if args.compensate_noise:
                                mod_spec = mod_spec * faxis
                            if args.absolute_value:
                                temp2 = np.abs(mod_spec[coeff_0 - 1:coeff_n])
                            else:
                                temp2 = mod_spec[coeff_0 - 1:coeff_n]

                        if args.keep_even:
                            if coeff_0 % 2 == 0:
                                each_feat[j, :] = temp2[1::2]
                            else:
                                each_feat[j, :] = temp2[0::2]
                        else:
                            each_feat[j, :] = temp2

                    each_feat = np.reshape(each_feat, (1, nfilters * feat_len))

                    feats[i, :] = each_feat

                all_feats[uttid] = feats

        dict2Ark(all_feats, outfile, kaldi_cmd)

Example #3

Show file

File: computeModulationSpectrum_segments.py Project: sadhusamik/pyspeech

def getFeats(args, srate=16000, window=np.hanning):
    
    wavs=args.scp
    segment=args.segment
    outfile=args.outfile
    add_reverb=args.add_reverb
    set_unity_gain=args.set_unity_gain
    nmodulations=args.nmodulations
    order=args.order
    fduration=args.fduration
    frate=args.frate
    nfilters=args.nfilters
    kaldi_cmd=args.kaldi_cmd
    
    fbank = createFbank(nfilters, int(2*fduration*srate), srate)
    
    if add_reverb:
        if add_reverb=='small_room':
            sr_r, rir=read('./RIR/RIR_SmallRoom1_near_AnglA.wav')
            rir=rir[:,1]
            rir=rir/np.power(2,15)
        elif add_reverb=='large_room':
            sr_r, rir=read('./RIR/RIR_LargeRoom1_far_AnglA.wav')
            rir=rir[:,1]
            rir=rir/np.power(2,15)
        elif add_reverb=='clean':
            print('%s: No reverberation added!' % sys.argv[0])
        else:
            raise ValueError('Invalid type of reverberation!')
            
    wav_in_buffer='' # Wav file that is currently in RAM
    
    # Load Location and Ids of all wav files 
    wav_ids=[]; wav_locs=[]
    with open(wavs, 'r') as fid:
        for line in fid:
             tokens = line.strip().split()
             uttid, inwav = tokens[0], ' '.join(tokens[1:])
             wav_ids.append(uttid)
             wav_locs.append(inwav)
    
    # Compute features for all the segments
    with open(segment, 'r') as fid_s:
         all_feats={}
         for line_s in fid_s:
            token_s = line_s.strip().split()
            seg_id=token_s[0]; wav_id=token_s[1] 
            
            # Load wav file it is already not in RAM unload
            if wav_in_buffer!=wav_id:
               wav_in_buffer=wav_id
               inwav=wav_locs[wav_ids.index(wav_id)]
               if inwav[-1] == '|':
                   proc = subprocess.run(inwav[:-1], shell=True,
                                         stdout=subprocess.PIPE)
                   sr, signal_big = read(io.BytesIO(proc.stdout))
               else:
                   sr, signal_big = read(inwav)
                   assert sr == srate, 'Input file has different sampling rate.'
               
            t_beg=int(float(token_s[2])*sr); t_end=int(float(token_s[3])*sr)
            signal=signal_big[t_beg:t_end]
            signal=signal/np.power(2,15)

            if add_reverb:
                if not add_reverb=='clean':
                    signal=addReverb(signal,rir)
                
            time_frames = np.array([frame for frame in
                getFrames(signal, srate, frate, fduration, window)])

            cos_trans=freqAnalysis.dct(time_frames)/np.sqrt(2*int(srate * fduration))
            
            [frame_num, ndct]=np.shape(cos_trans)
                            
            feats=np.zeros((frame_num,nfilters*nmodulations))
            print('%s: Computing Features for file: %s and segment: %s' % (sys.argv[0],wav_id,seg_id))
            sys.stdout.flush()
            for i in range(frame_num):
                each_feat=np.zeros([nfilters,nmodulations])
                for j in range(nfilters):
                    filt=fbank[j,0:-1]
                    band_dct=filt*cos_trans[i,:]
                    xlpc, gg=computeLpcFast(band_dct,order) # Compute LPC coefficients 
                    if set_unity_gain:
                        gg=1
                    mod_spec=computeModSpecFromLpc(gg,xlpc,nmodulations)
                    each_feat[j,:]=mod_spec
                each_feat=np.reshape(each_feat,(1,nfilters*nmodulations))
                feats[i,:]=each_feat
        
            all_feats[seg_id]=feats
    dict2Ark(all_feats,outfile,kaldi_cmd)

Example #4

Show file

File: computeFDLPModSpecFeats.py Project: ruizhilijhu/multi-stream

def extractModSpecFeatures(wavs,
                           outdir,
                           phone_map,
                           phn_file_dir,
                           get_phone_labels=True,
                           only_center=True,
                           around_center=1,
                           ignore_edge=False,
                           nmodulations=12,
                           order=50,
                           fduration=0.5,
                           frate=100,
                           nfft=512,
                           nfilters=15,
                           srate=16000,
                           window=np.hanning):
    '''Extract the Modulation Spectral Features.

    Args:
        wavs (list): List of (uttid, 'filename or pipe-command').
        outdir (string): Output of an existing directory.
        phone_map(string): Map of the phonemes from Kaldi
        get_phone_labels(bool): Set True if you want to get the phoneme labels  
        fduration (float): Frame duration in seconds.
        frate (int): Frame rate in Hertz.
        hz2scale (function): Hz -> 'scale' conversion.
        nfft (int): Number of points to compute the FFT.
        nfilters (int): Number of filters.
        postproc (function): User defined post-processing function.
        srate (int): Expected sampling rate of the audio.
        scale2hz (function): 'scale' -> Hz conversion.
        srate (int): Expected sampling rate.
        window (function): Windowing function.

    Note:
        It is possible to use a Kaldi like style to read the audio
        using a "pipe-command" e.g.: "sph2pipe -f wav /path/file.wav |"

    '''

    if not only_center:
        fbank = createFbank(nfilters, int(2 * fduration * srate), srate)

        # Get list of phonemes
        phn_list = []

        with open(phone_map, 'r') as fid2:
            for line2 in fid2:
                line2 = line2.strip().split()
                if len(line2) == 2:
                    if 'sil' not in line2 and 'SIL' not in line2:
                        phn_list.append(line2[1])

        phn_list = list(set(phn_list))
        phn_list.sort()

        with open(wavs, 'r') as fid:

            # Initialize matrix for all features
            if get_phone_labels:
                all_feats = np.empty(nmodulations * nfilters + 1)
            else:
                all_feats = np.empty(nmodulations * nfilters)

            for line in fid:
                tokens = line.strip().split()
                uttid, inwav = tokens[0], ' '.join(tokens[1:])

                fname_phn = uttid + '.PHN'

                if get_phone_labels:

                    # Get first line of phone file in the beginning
                    phn_file = open(fname_phn)
                    phn_line = phn_file.readline()
                    phn_locs = phn_line.strip().split()
                    # Get phoneme information
                    phone_now = phn_locs[2]
                    phone_end = int(int(phn_locs[1]) / 160)
                    beg_frame = int(int(phn_locs[0]) / 160)

                if inwav[-1] == '|':
                    proc = subprocess.run(inwav[:-1],
                                          shell=True,
                                          stdout=subprocess.PIPE)
                    sr, signal = read(io.BytesIO(proc.stdout))
                else:
                    sr, signal = read(inwav)
                assert sr == srate, 'Input file has different sampling rate.'

                # I want to work with numbers from 0 to 1 so....
                signal = signal / np.power(2, 15)

                time_frames = np.array([
                    frame for frame in getFrames(signal, srate, frate,
                                                 fduration, window)
                ])

                cos_trans = freqAnalysis.dct(time_frames) / np.sqrt(
                    2 * int(srate * fduration))

                [frame_num, ndct] = np.shape(cos_trans)

                if get_phone_labels:
                    feats = np.zeros([frame_num, nmodulations * nfilters + 1])
                else:
                    feats = np.zeros([frame_num, nmodulations * nfilters])

                    print('Computing Features for file: %s' % uttid)

                for i in range(beg_frame, frame_num):

                    each_feat = np.zeros([nfilters, nmodulations])
                    for j in range(nfilters):
                        filt = fbank[j, 0:-1]
                        band_dct = filt * cos_trans[i, :]
                        xlpc, gg = computeLpcFast(
                            band_dct, order)  # Compute LPC coefficients
                        mod_spec = computeModSpecFromLpc(
                            gg, xlpc, nmodulations)
                        each_feat[j, :] = mod_spec
                    each_feat = np.reshape(each_feat,
                                           (1, nfilters * nmodulations))

                    if get_phone_labels:
                        # Udates to current phoneme
                        if i > phone_end:
                            # Get new phone label
                            phn_line = phn_file.readline()
                            if phn_line:
                                phn_locs = phn_line.strip().split()
                                phone_now = phn_locs[2]
                                phone_end = int(int(phn_locs[1]) / 160)
                            else:
                                break  # Break if no more phones are remaining

                        ind = phn_list.index(phone_now)
                        each_feat = np.append(each_feat, ind)
                    feats[i, :] = each_feat

                all_feats = np.vstack([all_feats, feats])
            all_feats = all_feats[1:, :]

            # Save the final BIG feature file
            np.save(os.path.join(outdir), all_feats)
            np.save(os.path.join(os.path.dirname(outdir), 'phone_list'),
                    phn_list)
    else:
        fbank = createFbank(nfilters, int(2 * fduration * srate), srate)

        # Get list of phonemes
        phn_list = []

        with open(phone_map, 'r') as fid2:
            for line2 in fid2:
                line2 = line2.strip().split()
                if len(line2) == 2:
                    if 'sil' not in line2 and 'SIL' not in line2:
                        phn_list.append(line2[1])

        phn_list = list(set(phn_list))
        phn_list.sort()

        with open(wavs, 'r') as fid:
            # Initialize matrix for all features
            if get_phone_labels:
                all_feats = np.empty(nmodulations * nfilters + 1)
            else:
                all_feats = np.empty(nmodulations * nfilters)

            for line in fid:
                tokens = line.strip().split()
                uttid, inwav = tokens[0], ' '.join(tokens[1:])

                if inwav[-1] == '|':
                    proc = subprocess.run(inwav[:-1],
                                          shell=True,
                                          stdout=subprocess.PIPE)
                    sr, signal = read(io.BytesIO(proc.stdout))
                else:
                    sr, signal = read(inwav)
                assert sr == srate, 'Input file has different sampling rate.'

                # I want to work with numbers from 0 to 1 so....
                signal = signal / np.power(2, 15)

                fname_phn = uttid + '.PHN'

                # Get all phones and their center

                if os.path.isfile(os.path.join(phn_file_dir, fname_phn)):
                    phn_file = open(os.path.join(phn_file_dir, fname_phn))
                    phone_mid = np.empty(0)
                    phone_now = np.empty(0)
                    for line2 in phn_file:

                        phn_locs = line2.strip().split()
                        if phn_locs[2] in phn_list:
                            ind = phn_list.index(phn_locs[2])
                            phone_now = np.append(phone_now, ind)
                            phone_mid = np.append(
                                phone_mid,
                                int(int(phn_locs[0]) + int(phn_locs[1])) / 2)

                    time_frames = np.array([
                        frame for frame in getFrames(signal, srate, frate,
                                                     fduration, window)
                    ])

                    cos_trans = freqAnalysis.dct(time_frames) / np.sqrt(
                        2 * int(srate * fduration))

                    [frame_num, ndct] = np.shape(cos_trans)

                    if ignore_edge:
                        phone_mid = phone_mid[1:-1]
                        phone_now = phone_now[1:-1]

                    only_compute = len(phone_mid)

                    if get_phone_labels:
                        feats = np.zeros([
                            only_compute * around_center,
                            nmodulations * nfilters + 1
                        ])
                    else:
                        feats = np.zeros([
                            only_compute * around_center,
                            nmodulations * nfilters
                        ])

                    print('Computing Features for file: %s' % uttid)
                    for kk in range(only_compute):
                        i_mid = int(np.floor((phone_mid[kk])))
                        for cont in range(around_center):
                            i = i_mid + cont - int((around_center - 1) / 2)
                            each_feat = np.zeros([nfilters, nmodulations])
                            for j in range(nfilters):
                                filt = fbank[j, 0:-1]
                                band_dct = filt * cos_trans[i, :]
                                xlpc, gg = computeLpcFast(
                                    band_dct,
                                    order)  # Compute LPC coefficients
                                mod_spec = computeModSpecFromLpc(
                                    gg, xlpc, nmodulations)
                                each_feat[j, :] = mod_spec
                            each_feat = np.reshape(
                                each_feat, (1, nfilters * nmodulations))
                            if get_phone_labels:
                                feats[around_center * kk +
                                      cont, :] = np.append(
                                          each_feat, phone_now[kk])
                            else:
                                feats[around_center * kk + cont, :] = each_feat

                    #if get_phone_labels:
                    #    feats=np.append(feats,np.reshape(phone_now,(len(phone_now),1)),axis=1)
                    all_feats = np.vstack([all_feats, feats])

            all_feats = all_feats[1:, :]

            # Save the final BIG feature file
            np.save(os.path.join(outdir), all_feats)
            np.save(os.path.join(os.path.dirname(outdir), 'phone_list'),
                    phn_list)

Example #5

Show file

def extractModSpecFeatures(args, srate=16000, window=np.hanning):

    wavs = args.scp
    outfile = args.outfile
    phone_map = args.phn_file
    phn_file_dir = args.phn_file_dir
    get_phone_labels = args.get_phone_labels
    add_reverb = args.add_reverb
    set_unity_gain = args.set_unity_gain
    nmodulations = args.nmodulations
    order = args.order
    fduration = args.fduration
    frate = args.frate
    nfilters = args.nfilters
    '''Extract the Modulation Spectral Features.

    Args:
        wavs (list): List of (uttid, 'filename or pipe-command').
        outdir (string): Output of an existing directory.
        phone_map(string): Map of the phonemes from Kaldi
        get_phone_labels(bool): Set True if you want to get the phoneme labels  
        fduration (float): Frame duration in seconds.
        frate (int): Frame rate in Hertz.
        hz2scale (function): Hz -> 'scale' conversion.
        nfft (int): Number of points to compute the FFT.
        nfilters (int): Number of filters.
        postproc (function): User defined post-processing function.
        srate (int): Expected sampling rate of the audio.
        scale2hz (function): 'scale' -> Hz conversion.
        srate (int): Expected sampling rate.
        window (function): Windowing function.

    Note:
        It is possible to use a Kaldi like style to read the audio
        using a "pipe-command" e.g.: "sph2pipe -f wav /path/file.wav |"

    '''

    fbank = createFbank(nfilters, int(2 * fduration * srate), srate)

    if add_reverb:
        if add_reverb == 'small_room':
            sr_r, rir = read('./RIR/RIR_SmallRoom1_near_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'large_room':
            sr_r, rir = read('./RIR/RIR_LargeRoom1_far_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'clean':
            print('%s: No reverberation added!' % sys.argv[0])
        else:
            raise ValueError('Invalid type of reverberation!')

    # Get list of phonemes
    phn_list = []

    with open(phone_map, 'r') as fid2:
        for line2 in fid2:
            line2 = line2.strip().split()
            if len(line2) == 2:
                if 'sil' not in line2 and 'SIL' not in line2:
                    phn_list.append(line2[1])

    phn_list = list(set(phn_list))
    phn_list.sort()

    with open(wavs, 'r') as fid:
        all_feats = {}

        for line in fid:
            tokens = line.strip().split()
            uttid, inwav = tokens[0], ' '.join(tokens[1:])

            if inwav[-1] == '|':
                proc = subprocess.run(inwav[:-1],
                                      shell=True,
                                      stdout=subprocess.PIPE)
                sr, signal = read(io.BytesIO(proc.stdout))
            else:
                sr, signal = read(inwav)
            assert sr == srate, 'Input file has different sampling rate.'

            # I want to work with numbers from 0 to 1 so....
            signal = signal / np.power(2, 15)

            if add_reverb:
                if not add_reverb == 'clean':
                    signal = addReverb(signal, rir)

            fname_phn = uttid + '.PHN'

            # Get all phones and their center

            if os.path.isfile(os.path.join(phn_file_dir, fname_phn)):
                phn_file = open(os.path.join(phn_file_dir, fname_phn))
                phone_mid = np.empty(0)
                phone_now = np.empty(0)
                for line2 in phn_file:

                    phn_locs = line2.strip().split()
                    if phn_locs[2] in phn_list:
                        ind = phn_list.index(phn_locs[2])
                        phone_now = np.append(phone_now, ind)
                        phone_mid = np.append(
                            phone_mid,
                            int(int(phn_locs[0]) + int(phn_locs[1])) / 2)

                time_frames = np.array([
                    frame for frame in getFrames(signal, srate, frate,
                                                 fduration, window)
                ])

                cos_trans = freqAnalysis.dct(time_frames) / np.sqrt(
                    2 * int(srate * fduration))

                [frame_num, ndct] = np.shape(cos_trans)

                only_compute = len(phone_mid)

                if get_phone_labels:
                    feats = np.zeros(
                        [only_compute, nmodulations * nfilters + 1])
                else:
                    feats = np.zeros([only_compute, nmodulations * nfilters])

                print('Computing Features for file: %s' % uttid)
                sys.stdout.flush()
                for kk in range(only_compute):
                    i = int(np.floor((phone_mid[kk])))
                    each_feat = np.zeros([nfilters, nmodulations])
                    for j in range(nfilters):
                        filt = fbank[j, 0:-1]
                        band_dct = filt * cos_trans[i, :]
                        #band_dct=band_dct[band_dct>0]
                        xlpc, gg = computeLpcFast(
                            band_dct, order)  # Compute LPC coefficients
                        if set_unity_gain:
                            gg = 1
                        mod_spec = computeModSpecFromLpc(
                            gg, xlpc, nmodulations)
                        each_feat[j, :] = mod_spec
                    each_feat = np.reshape(each_feat,
                                           (1, nfilters * nmodulations))
                    if get_phone_labels:
                        feats[kk, :] = np.append(each_feat, phone_now[kk])
                    else:
                        feats[kk, :] = each_feat

                all_feats[uttid] = feats

        # Save the final BIG feature file
        pickle.dump(all_feats, open(outfile, 'wb'))
        np.save(os.path.join(os.path.dirname(outfile), 'phone_list'), phn_list)

Example #6

Show file

def getFeats(args, srate=16000, window=np.hamming):
    wavs = args.scp
    scp_type = args.scp_type
    outfile = args.outfile
    coeff_num = args.coeff_num
    coeff_range = args.coeff_range
    order = args.order
    fduration = args.fduration
    frate = args.frate
    nfilters = args.nfilters
    kaldi_cmd = args.kaldi_cmd
    add_noise = args.add_noise
    add_reverb = args.add_reverb

    if args.lifter_config:
        fid = open(args.lifter_config, 'r')
        lifter_config = fid.readline().strip().split(',')
        lifter_config = np.asarray([float(x) for x in lifter_config])

    # Set up mel-filterbank
    fbank_type = args.fbank_type.strip().split(',')
    if fbank_type[0] == "mel":
        if len(fbank_type) < 2:
            raise ValueError('Mel filter bank not configured properly....')
        fbank = createFbank(nfilters, int(2 * fduration * srate), srate, warp_fact=float(fbank_type[1]))
    elif fbank_type[0] == "cochlear":
        if len(fbank_type) < 6:
            raise ValueError('Cochlear filter bank not configured properly....')
        if int(fbank_type[3]) == 1:
            print('%s: Alpha is fixed and will not change as a function of the center frequency...' % sys.argv[0])
        fbank = createFbankCochlear(nfilters, int(2 * fduration * srate), srate, om_w=float(fbank_type[1]),
                                    alp=float(fbank_type[2]), fixed=int(fbank_type[3]), bet=float(fbank_type[4]),
                                    warp_fact=float(fbank_type[5]))
    else:
        raise ValueError('Invalid type of filter bank, use mel or cochlear with proper configuration')

    # Ignore odd modulations
    if args.odd_mod_zero:
        print('%s: Ignoring odd modulations... ' % sys.argv[0])
    if add_noise:
        if add_noise == "clean" or add_noise == "diff":
            print('%s: No noise added!' % sys.argv[0])
        else:
            noise_info = add_noise.strip().split(',')
            noise = load_noise(noise_info[0])

    if add_reverb:
        if add_reverb == 'small_room':
            sr_r, rir = read('./RIR/RIR_SmallRoom1_near_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'medium_room':
            sr_r, rir = read('./RIR/RIR_MediumRoom1_far_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'large_room':
            sr_r, rir = read('./RIR/RIR_LargeRoom1_far_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'clean':
            print('%s: No reverberation added!' % sys.argv[0])
        else:
            raise ValueError('Invalid type of reverberation!')

    # Set up mask
    coeff_range = coeff_range.split(',')
    lowpass = int(coeff_range[0])
    highpass = int(coeff_range[1])
    mask = []
    for i in range(coeff_num):
        if i >= lowpass and i <= highpass:
            mask.append(1)
        else:
            mask.append(0)
    mask = np.asarray(mask)
    args.overlap_fraction = 1 - args.overlap_fraction

    # Setup modulation weights
    args.gamma_weight = args.gamma_weight.strip().split(',')
    if not args.gamma_weight[0] == "None":
        print('%s: Adding gamma filter on modulation frequencies...' % sys.argv[0])
        x = np.linspace(0, order - 1, order)
        scale = float(args.gamma_weight[0])
        shape = float(args.gamma_weight[1])
        pk_required = float(args.gamma_weight[2])
        res = 2 * fduration
        pk_required = pk_required * res
        pk = (shape - 1) * scale
        loc = -pk + pk_required
        mod_wts = stats.gamma.pdf(x, a=shape, loc=loc, scale=scale) * 3 * scale
    with open(wavs, 'r') as fid:

        all_feats = {}
        if args.write_utt2num_frames:
            all_lens = {}

        for line in fid:
            tokens = line.strip().split()
            uttid, inwav = tokens[0], ' '.join(tokens[1:])

            if scp_type == 'wav':
                if inwav[-1] == '|':
                    try:
                        proc = subprocess.run(inwav[:-1], shell=True, stdout=subprocess.PIPE)
                        sr, signal = read(io.BytesIO(proc.stdout))
                        skip_rest=False
                    except Exception:
                        skip_rest=True
                else:
                    try:
                        sr, signal = read(inwav)
                        skip_rest = False
                    except Exception:
                        skip_rest = True

                assert sr == srate, 'Input file has different sampling rate.'
            elif scp_type == 'segment':
                try:
                    cmd = 'wav-copy ' + inwav + ' - '
                    proc = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
                    sr, signal = read(io.BytesIO(proc.stdout))
                    skip_rest = False
                except Exception:
                    skip_rest = True
            else:
                raise ValueError('Invalid type of scp type, it should be either wav or segment')

            # I want to work with numbers from 0 to 1 so....
            # signal = signal / np.power(2, 15)

            if not skip_rest:
                if add_noise:
                    if not add_noise == "clean":
                        if add_noise == "diff":
                            a = [1, 2, 3, 2, 0, -2, -5, -2, 0, 2, 3, 2, 1]
                            signal = convolve(signal, a, mode='same')
                        else:
                            signal = add_noise_to_wav(signal, noise, float(noise_info[1]))

                if add_reverb:
                    if not add_reverb == 'clean':
                        signal = addReverb(signal, rir)

                tframes = signal.shape[0]  # Number of samples in the signal

                lfr = 1 / (args.overlap_fraction * fduration)
                time_frames = np.array([frame for frame in
                                        getFrames(signal, srate, lfr, fduration, window)])

                cos_trans = freqAnalysis.dct(time_frames) / np.sqrt(2 * int(srate * fduration))

                [frame_num, ndct] = np.shape(cos_trans)

                feats = np.zeros((nfilters, int(np.ceil(tframes * frate / srate))))
                ptr = int(0)

                print('%s: Computing Features for file: %s' % (sys.argv[0], uttid))
                sys.stdout.flush()

                for i in range(0, frame_num):
                    for j in range(nfilters):
                        filt = fbank[j, 0:-1]
                        band_dct = filt * cos_trans[i, :]
                        xlpc, gg = computeLpcFast(band_dct, order)  # Compute LPC coefficients
                        ms = computeModSpecFromLpc(gg, xlpc, coeff_num)
                        ms = ms * mask
                        if args.lifter_config:
                            ms = ms * lifter_config
                        if not args.gamma_weight[0] == "None":
                            ms = ms * mod_wts
                        if args.odd_mod_zero:
                            ms[1::2] = 0
                        ms = fft(ms, 2 * int(fduration * frate))
                        ms = np.abs(np.exp(ms))
                        kk = int(np.round(fduration * frate))
                        kkb2 = int(np.round(fduration * frate / 2))
                        ms = ms[0:kk] * np.hanning(kk) / window(kk)

                        if i == 0:
                            if feats.shape[1] < kkb2:
                                feats[j, :] += ms[kkb2:kkb2 + feats.shape[1]]
                            else:
                                feats[j, ptr:ptr + kkb2] += ms[kkb2:]
                        elif i == frame_num - 1 or i == frame_num - 2:
                            if ms.shape[0] >= feats.shape[1] - ptr:
                                feats[j, ptr:] += ms[:feats.shape[1] - ptr]
                            else:
                                feats[j, ptr:ptr + kk] += ms
                        else:
                            feats[j, ptr:ptr + kk] += ms

                    kk = int(np.round(fduration * frate * args.overlap_fraction))
                    kkb2 = int(np.round(fduration * frate / 2))
                    if i == 0:
                        ptr = int(ptr + kk - kkb2)
                    else:
                        ptr = int(ptr + kk + randrange(2))

                all_feats[uttid] = np.log(np.clip(feats.T, a_max=None, a_min=0.00000000000001))
                if args.write_utt2num_frames:
                    all_lens[uttid] = feats.shape[1]

        dict2Ark(all_feats, outfile, kaldi_cmd)
        if args.write_utt2num_frames:
            with open(outfile + '.len', 'w+') as file:
                for key, lens in all_lens.items():
                    p = "{:s} {:d}".format(key, lens)
                    file.write(p)
                    file.write("\n")

Example #7

Show file

File: computeModulationSpectrum_withphonelabels.py Project: sadhusamik/pyspeech

def extractModSpecFeatures(args, srate=16000, window=np.hamming):
    """ Extract the mel scale filter-bank energy features
    """

    wavs = args.scp
    outfile = args.outfile
    add_reverb = args.add_reverb
    set_unity_gain = args.set_unity_gain
    nmodulations = args.nmodulations
    order = args.order
    fduration = args.fduration
    frate = args.frate
    nfilters = args.nfilters
    phone_map = args.phn_file
    phn_file_dir = args.phn_file_dir
    get_phone_labels = args.get_phone_labels

    fbank = createFbank(nfilters, int(2 * fduration * srate), srate)

    if add_reverb:
        if add_reverb == 'small_room':
            sr_r, rir = read('./RIR/RIR_SmallRoom1_near_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'large_room':
            sr_r, rir = read('./RIR/RIR_LargeRoom1_far_AnglA.wav')
            rir = rir[:, 1]
            rir = rir / np.power(2, 15)
        elif add_reverb == 'clean':
            print('%s: No reverberation added!' % sys.argv[0])
        else:
            raise ValueError('Invalid type of reverberation!')
        sys.stdout.flush()

    # Get list of phonemes
    phn_list = []

    with open(phone_map, 'r') as fid2:
        for line2 in fid2:
            line2 = line2.strip().split()
            if len(line2) == 2:
                if 'sil' not in line2 and 'SIL' not in line2:
                    phn_list.append(line2[1])

    phn_list = list(set(phn_list))
    phn_list.sort()
    np.save('phone_list', phn_list)

    with open(wavs, 'r') as fid:

        all_feats = {}
        for line in fid:
            tokens = line.strip().split()
            uttid, inwav = tokens[0], ' '.join(tokens[1:])

            fname_phn = uttid + '.PHN'
            fname_phn_base = uttid[0:-2] + '.PHN'

            if isfile(join(phn_file_dir, fname_phn_base)):
                fname_phn = fname_phn_base

            # Get all the locations of phonemes
            phone_now = np.empty(0)
            phone_end = np.empty(0)
            phone_beg = np.empty(0)

            if isfile(join(phn_file_dir, fname_phn)):
                print('%s: Computing Features for file: %s' %
                      (sys.argv[0], uttid))
                sys.stdout.flush()
                with open(join(phn_file_dir, fname_phn)) as phn_file:

                    for phn_line in phn_file:

                        phn_locs = phn_line.strip().split()

                        # Get phoneme information

                        phone_now = np.append(phone_now, phn_locs[2])
                        phone_end = np.append(phone_end, phn_locs[1])
                        phone_beg = np.append(phone_beg, phn_locs[0])
                phn_file.close()
                if np.size(phone_end) == 0:
                    print('%s: Corrupted Phone file.. hence skipped...' %
                          sys.argv[0])
                    continue

                if inwav[-1] == '|':
                    proc = subprocess.run(inwav[:-1],
                                          shell=True,
                                          stdout=subprocess.PIPE)
                    sr, signal = read(io.BytesIO(proc.stdout))
                else:
                    sr, signal = read(inwav)
                assert sr == srate, 'Input file has different sampling rate.'

                signal = signal / np.power(2, 15)

                if add_reverb:
                    if not add_reverb == 'clean':
                        signal = addReverb(signal, rir)

                time_frames = np.array([
                    frame for frame in getFrames(signal, srate, frate,
                                                 fduration, window)
                ])

                cos_trans = dct(time_frames) / np.sqrt(
                    2 * int(srate * fduration))

                [frame_num, ndct] = np.shape(cos_trans)

                # Main feature computation loop

                feats = np.zeros((frame_num, nfilters * nmodulations))
                sys.stdout.flush()
                for i in range(frame_num):
                    each_feat = np.zeros([nfilters, nmodulations])
                    for j in range(nfilters):
                        filt = fbank[j, 0:-1]
                        band_dct = filt * cos_trans[i, :]
                        xlpc, gg = computeLpcFast(
                            band_dct, order)  # Compute LPC coefficients
                        if set_unity_gain:
                            gg = 1
                        mod_spec = computeModSpecFromLpc(
                            gg, xlpc, nmodulations)
                        each_feat[j, :] = mod_spec
                    each_feat = np.reshape(each_feat,
                                           (1, nfilters * nmodulations))
                    feats[i, :] = each_feat

                if not get_phone_labels:
                    now_feats = np.empty(nfilters * nmodulations)
                else:
                    now_feats = np.empty(nfilters * nmodulations + 1)

                for num, phn in enumerate(phone_now):

                    now_frames = feats[
                        int(phone_beg[num]):int(phone_end[num]), :]
                    if get_phone_labels:
                        ind = phn_list.index(phn)
                        fr_num = now_frames.shape[0]
                        now_frames = np.concatenate(
                            (now_frames, np.tile(ind, (fr_num, 1))), axis=1)

                    now_feats = np.vstack([now_feats, now_frames])
                now_feats = now_feats[1:, :]
            all_feats[uttid] = now_feats

        pickle.dump(all_feats, open(outfile, 'wb'))