Python get_syllable_features_directoryの例、template_speech_rec.get_train_data.get_syllable_features_directory Pythonの例

コード例 #1

0

ファイルを表示

ファイル: main.py プロジェクト: markstoehr/Template-Speech-Recognition

    num_window_step_samples=80,
    fft_length=512,
    kernel_length=7,
    freq_cutoff=3000,
    use_mel=True)

ep = gtrd.EdgemapParameters(block_length=40,
                            spread_length=1,
                            threshold=.7)

utterances_path = '/home/mark/Template-Speech-Recognition/Data/Train/'
file_indices = gtrd.get_data_files_indices(utterances_path)

syllable=('aa','r')
syllable_features,avg_bgd=gtrd.get_syllable_features_directory(utterances_path,file_indices,syllable,
                                    S_config=sp,E_config=ep,offset=0,
                                    E_verbose=False,return_avg_bgd=True)

np.save('data/aar_bgd_mel.npy',avg_bgd.E)

example_mat = gtrd.recover_example_map(syllable_features)
lengths,waveforms  = gtrd.recover_waveforms(syllable_features,example_mat)

np.savez('data/aar_waveforms_lengths.npz',waveforms,lengths,example_mat)

Slengths,Ss  = gtrd.recover_specs(syllable_features,example_mat)

np.savez('data/aar_Ss_mel_lengths.npz',Ss,Slengths,example_mat)


lengths,Es  = gtrd.recover_edgemaps(syllable_features,example_mat)

コード例 #2

0

ファイルを表示

ファイル: all_phn_detection.py プロジェクト: markstoehr/Template-Speech-Recognition

                                    num_mix_params,
                                    phn_mapping=leehon_mapping,
                                    waveform_offset=15)

use_phns = list(set(leehon_mapping.values()))

for phn_idx, phn in enumerate(use_phns[12:]):
    print phn_idx, phn
    phn_tuple = (phn,)
    print phn
    waveform_offset = 15
    phn_mapping=leehon_mapping
    chunk_length=1000
    phn_features,avg_bgd=gtrd.get_syllable_features_directory(utterances_path,file_indices,phn_tuple,
                                                              S_config=sp,E_config=ep,offset=0,
                                                              E_verbose=False,return_avg_bgd=True,
                                                              waveform_offset=15,
                                                              phn_mapping=phn_mapping)
    bgd = np.clip(avg_bgd.E,.01,.99)
    np.save('data/bgd.npy',bgd)
    example_mat = gtrd.recover_example_map(phn_features)
    lengths,waveforms  = gtrd.recover_waveforms(phn_features,example_mat)
    np.savez('data/waveforms_lengths.npz',waveforms=waveforms,
             lengths=lengths,
         example_mat=example_mat)
    Slengths,Ss  = gtrd.recover_specs(phn_features,example_mat)
    Ss = Ss.astype(np.float32)
    np.savez('data/Ss_lengths.npz' ,Ss=Ss,Slengths=Slengths,example_mat=example_mat)
    Elengths,Es  = gtrd.recover_edgemaps(phn_features,example_mat,bgd=bgd)
    Es = Es.astype(np.uint8)
    np.savez('data/Es_lengths.npz' ,Es=Es,Elengths=Elengths,example_mat=example_mat)

コード例 #3

0

ファイルを表示

ファイル: phn_detection.py プロジェクト: markstoehr/Template-Speech-Recognition

def perform_phn_template_estimation(phn,utterances_path,
                                    file_indices,sp,ep,
                                    num_mix_params,
                                    phn_mapping=None,
                                    waveform_offset=15,
                                    chunk_length=1000):
    phn_tuple = (phn,)
    print phn
    phn_features,avg_bgd=gtrd.get_syllable_features_directory(utterances_path,file_indices,phn_tuple,
                                                              S_config=sp,E_config=ep,offset=0,
                                                              E_verbose=False,return_avg_bgd=True,
                                                              waveform_offset=15,
                                                              phn_mapping=phn_mapping)
    bgd = np.clip(avg_bgd.E,.01,.99)
    np.save('data/bgd.npy',bgd)
    example_mat = gtrd.recover_example_map(phn_features)
    lengths,waveforms  = gtrd.recover_waveforms(phn_features,example_mat)
    np.savez('data/waveforms_lengths.npz',waveforms=waveforms,
             lengths=lengths,
         example_mat=example_mat)
    Slengths,Ss  = gtrd.recover_specs(phn_features,example_mat)
    Ss = Ss.astype(np.float32)
    np.savez('data/Ss_lengths.npz' ,Ss=Ss,Slengths=Slengths,example_mat=example_mat)
    Elengths,Es  = gtrd.recover_edgemaps(phn_features,example_mat,bgd=bgd)
    Es = Es.astype(np.uint8)
    np.savez('data/Es_lengths.npz' ,Es=Es,Elengths=Elengths,example_mat=example_mat)
    # the Es are padded from recover_edgemaps
    f = open('data/mixture_estimation_stats_%s.data' % phn,'w')
    for num_mix in num_mix_params:
        print num_mix
        if num_mix == 1:
            affinities = np.ones((Es.shape[0],1),dtype=np.float64)
            mean_length = int(np.mean(Elengths) + .5)
            templates = (np.mean(Es,0)[:mean_length],)
            spec_templates = (np.mean(Ss,0)[:mean_length],)
            np.save('data/%d_affinities.npy' % (num_mix),
                    affinities)
            np.save('data/%d_templates.npy' % (num_mix),
                    templates)
            np.save('data/%d_spec_templates.npy' % (num_mix),
                    spec_templates)
            np.save('data/%d_templates_%s.npy' % (num_mix,phn),
                    templates)
            np.save('data/%d_spec_templates_%s.npy' % (num_mix,phn),
                    spec_templates)
            #
            # write the data to the mixture file for checking purposes
            # format is:
            #   num_components total c0 c1 c2 ... ck
            f.write('%d %d %g\n' % (num_mix,
                                  len(affinities),np.sum(affinities[:,0])))
        else:
            if len(Es) > chunk_length:
                bem = bm.BernoulliMixture(num_mix,Es[:chunk_length])
                bem.run_EM(.000001)
                for i in xrange(1,len(Es)/chunk_length):
                    start_idx = i*chunk_length
                    block_length = min(chunk_length,len(Es)-start_idx)
                    if block_length < chunk_length:
                        end_idx = len(Es)
                        start_idx = len(Es)-chunk_length
                        block_length = chunk_length
                    else:
                        end_idx = start_idx + block_length
                    bem.data_mat = Es[start_idx:end_idx].reshape(
                        block_length,bem.data_length)
                    bem.run_EM(.000001)


            else:
                bem = bm.BernoulliMixture(num_mix,Es)
                bem.run_EM(.000001)
            templates = et.recover_different_length_templates(bem.affinities,
                                                              Es[start_idx:end_idx],
                                                              Elengths[start_idx:end_idx])
            spec_templates = et.recover_different_length_templates(bem.affinities,
                                                               Ss[start_idx:end_idx],
                                                               Slengths[start_idx:end_idx])
            np.save('data/%d_affinities.npy' % (num_mix),
                    bem.affinities)
            np.savez('data/%d_templates.npz' % (num_mix),
                    *templates)
            np.savez('data/%d_spec_templates.npz' % (num_mix),
                    *spec_templates)
            np.savez('data/%d_templates_%s.npz' % (num_mix,phn),
                    *templates)
            np.savez('data/%d_spec_templates_%s.npz' % (num_mix,phn),
                    *spec_templates)
            f.write('%d %d ' % (num_mix,
                                  len(affinities))
                    + ' '.join(str(np.sum(affinities[:,i]))
                               for i in xrange(affinities.shape[1]))
                               +'\n')
    f.close()

コード例 #4

0

ファイルを表示

ファイル: parts.py プロジェクト: markstoehr/Template-Speech-Recognition

def save_syllable_features_to_data_dir(
    args,
    phn_tuple,
    utterances_path,
    file_indices,
    sp,
    ep,
    phn_mapping,
    tag_data_with_syllable_string=False,
    save_tag="train",
    waveform_offset=10,
    block_features=False,
):
    """
    Wrapper function to get all the examples processed
    """
    print "Collecting the data for phn_tuple " + " ".join("%s" % k for k in phn_tuple)
    syllable_string = "_".join(p for p in phn_tuple)

    phn_features, avg_bgd = gtrd.get_syllable_features_directory(
        utterances_path,
        file_indices,
        phn_tuple,
        S_config=sp,
        E_config=ep,
        offset=0,
        E_verbose=False,
        return_avg_bgd=True,
        waveform_offset=15,
        phn_mapping=phn_mapping,
    )
    bgd = np.clip(avg_bgd.E, 0.01, 0.4)
    np.save("data/bgd.npy", bgd)
    example_mat = gtrd.recover_example_map(phn_features)
    lengths, waveforms = gtrd.recover_waveforms(phn_features, example_mat)
    if tag_data_with_syllable_string:
        np.savez(
            "data/%s_waveforms_lengths_%s.npz" % (syllable_string, save_tag),
            waveforms=waveforms,
            lengths=lengths,
            example_mat=example_mat,
        )
    else:
        np.savez(
            "data/waveforms_lengths_%s.npz" % save_tag, waveforms=waveforms, lengths=lengths, example_mat=example_mat
        )
    Slengths, Ss = gtrd.recover_specs(phn_features, example_mat)
    Ss = Ss.astype(np.float32)
    if tag_data_with_syllable_string:
        np.savez(
            "data/%s_Ss_lengths_%s.npz" % (syllable_string, save_tag), Ss=Ss, Slengths=Slengths, example_mat=example_mat
        )
    else:
        np.savez("data/Ss_lengths_%s.npz" % (save_tag), Ss=Ss, Slengths=Slengths, example_mat=example_mat)
    Elengths, Es = gtrd.recover_edgemaps(phn_features, example_mat, bgd=bgd)
    Es = Es.astype(np.uint8)
    if tag_data_with_syllable_string:
        np.savez(
            "data/%s_Es_lengths_%s.npz" % (syllable_string, save_tag), Es=Es, Elengths=Elengths, example_mat=example_mat
        )
    else:
        np.savez("data/Es_lengths_%s.npz" % (save_tag), Es=Es, Elengths=Elengths, example_mat=example_mat)

    if args.doBlockFeatures:
        out = code_parts.code_parts(E.astype(np.uint8), logParts, logInvParts, args.bernsteinEdgeThreshold)
        max_responses = np.argmax(out, -1)
        Bs = code_parts.spread_patches(max_responses, 2, 2, out.shape[-1] - 1)