Python get_spectrogram_featuresの例、template_speech_rec.edge_signal_proc.get_spectrogram_features Pythonの例

コード例 #1

0

ファイルを表示

ファイル: get_detection_results.py プロジェクト: markstoehr/Template-Speech-Recognition

def output_detection_scores(s_fname,phns_fname,flts_fname,
                            scores_by_syllable,model_dict_list):
    """
    Parameters:
    ===========
    model_dict_list:
        hashtable, values are lists of models, keys are syllables
        the lists of models are actually given by the number of mixture
        components
    
    Returns:
    =========
    outputs_by_syllable:
        hashtable, keys are syllables, values are also hash tables
        keys to the hash are the number of components in the mixture
        model, the values to this hash table are lists of detection
        scores over all the utterances
        
    """
    s = np.load(s_fname)
    phns = np.load(phns_fname)
    flts = np.load(flts_fname)
        S = esp.get_spectrogram_features(s,
                                     sample_rate,
                                     num_window_samples,
                                     num_window_step_samples,
                                     fft_length,
                                     freq_cutoff,
                                     kernel_length)

コード例 #2

0

ファイルを表示

ファイル: CExtractbgd.py プロジェクト: markstoehr/zeroresource

def compute_edge_features(x,config_d):
    S, sample_mapping, sample_to_frames =  esp.get_spectrogram_features(x.astype(float)/(2**15-1),
                                                                        config_d['SPECTROGRAM']['sample_rate'],
                                                                        config_d['SPECTROGRAM']['num_window_samples'],
                                                                        config_d['SPECTROGRAM']['num_window_step_samples'],
                                                                        config_d['SPECTROGRAM']['fft_length'],
                                                                        config_d['SPECTROGRAM']['freq_cutoff'],
                                                                        config_d['SPECTROGRAM']['kernel_length'],
                                                                        preemph=config_d['SPECTROGRAM']['preemphasis'],
                                                                        no_use_dpss=config_d['SPECTROGRAM']['no_use_dpss'],
                                                                        do_freq_smoothing=config_d['SPECTROGRAM']['do_freq_smoothing'],
                                                                        return_sample_mapping=True
                                 )

    E, edge_feature_row_breaks,\
        edge_orientations = esp._edge_map_no_threshold(S.T)
    esp._edge_map_threshold_segments(E,
                                     config_d['EDGES']['block_length'],
                                     config_d['EDGES']['spread_length'],
                                     threshold=config_d['EDGES']['threshold'],
                                     edge_orientations = edge_orientations,
                                     edge_feature_row_breaks = edge_feature_row_breaks,
                                         abst_threshold=config_d['EDGES']['abst_threshold'],
                                     verbose=False)
    return reorg_part_for_fast_filtering(E).astype(np.uint8)

コード例 #3

0

ファイルを表示

ファイル: get_aar_data.py プロジェクト: markstoehr/Template-Speech-Recognition

def get_waliji_feature_map(s,
                           log_part_blocks,
                           log_invpart_blocks,
                           abst_threshold=np.array([.025,.025,.015,.015,
                                                     .02,.02,.02,.02]),
                           spread_length=3,
                           fft_length=512,
                           num_window_step_samples=80,
                           freq_cutoff=3000,
                           sample_rate=16000,
                           num_window_samples=320,
                           kernel_length=7):
    """
    Input is usually just the signal s as the rest of the parameters
    are not going to change very often

    Parameters:
    ===========
    s: np.ndarray[ndim=1]
        Raw signal data that we are extracting feature from
    log_part_blocks: np.ndarray[ndim=4,dtype=np.float32]
        First dimension is over the different features
    log_invpart_blocks: np.ndarray[ndim=4,dtype=np.float32]
        Essentially the same array as log_part_blocks. Related
        by its equal to np.log(1-np.exp(log_part_blocks))
    """
    S = esp.get_spectrogram_features(s,
                                     sample_rate,
                                     num_window_samples,
                                     num_window_step_samples,
                                     fft_length,
                                     freq_cutoff,
                                     kernel_length,
                                 )
    E, edge_feature_row_breaks,\
        edge_orientations = esp._edge_map_no_threshold(S)
    esp._edge_map_threshold_segments(E,
                                 40,
                                 1,
                                 threshold=.7,
                                 edge_orientations = edge_orientations,
                                 edge_feature_row_breaks = edge_feature_row_breaks)
    E = reorg_part_for_fast_filtering(E)
    F = cp.code_parts_fast(E.astype(np.uint8),log_part_blocks,log_invpart_blocks,10)
    F = np.argmax(F,2)
    # the amount of spreading to do is governed by the size of the part features
    F = swp.spread_waliji_patches(F,
                                  log_part_blocks.shape[1],
                                  log_part_blocks.shape[2],
                                  log_part_blocks.shape[0])
    return collapse_to_grid(F,log_part_blocks.shape[1],
                            log_part_blocks.shape[2])

コード例 #4

0

ファイルを表示

ファイル: test_coding.py プロジェクト: markstoehr/Template-Speech-Recognition

offset=3

for lower_cutoff in [10,30,40,50,60]:
    print "lower_cutoff = %d" %lower_cutoff
    bps = np.zeros((0,40,5),dtype=np.uint8)
    spec_windows = np.zeros((0,6,6))
    f = open(tmp_data_path+'guide_to_tmp_data_%d.txt' %lower_cutoff,'w')
    s_idx_list = []
    for s_idx, s_fname in enumerate(s_fnames[:400]):
        print "s_idx = %d" %s_idx
        f.write(str(s_idx)+'\t'+s_fname+'\n')
        s = np.load(s_fname)
        S = esp.get_spectrogram_features(s,
                                         sample_rate,
                                         num_window_samples,
                                         num_window_step_samples,
                                         fft_length,
                                         freq_cutoff,
                                     kernel_length)
        if lower_cutoff == 10:
            np.save(tmp_data_path+str(s_idx)+'S.npy',S)
        E, edge_feature_row_breaks,\
          edge_orientations = esp._edge_map_no_threshold(S)
        esp._edge_map_threshold_segments(E,
                                         20,
                                         1,
                                         threshold=.7,
                                         edge_orientations = edge_orientations,
                                         edge_feature_row_breaks = edge_feature_row_breaks)
        if lower_cutoff == 10:
            np.save(tmp_data_path+str(s_idx)+'E.npy',E)

コード例 #5

0

ファイルを表示

def main(args):
    """
    For each label and component constructed a positive and negative
    training set and train a linear SVM to separate them
    """
    config_d = configParserWrapper.load_settings(open(args.config,'r'))

    true_examples = []
    false_examples = []
    mean = 0
    total = 0
    num_less_than_eq = np.zeros(20)

    fls = np.loadtxt(args.fls_txt, dtype=str)
    
    
    all_X_patches = []
    all_S_patches = []
    
    htemp, dhtemp, ddhtemp, tttemp = fb.hermite_window(
                args.winsize,
                        args.num_tapers,
                                args.win_half_time_support)
    


    run_transform = lambda x, winlength : esp.get_spectrogram_features(x,
                                     16000,
                                     winlength,
                                     80,
                                                                     2**(int(np.ceil(np.log2(winlength)))),
                                     4000,
                                     7,
                                                                       
                                 )


    X_patches = []
    S_patches = []

    for fl_id, fl_path in enumerate(fls):
        if len(X_patches) > 100000: break
        S = run_transform(wavfile.read(fl_path)[1],                          args.winsize)
        # spectrogram(,
        #             16000,
        #             3200,
        #             args.winsize,
        #             2**int(np.ceil(np.log2(args.winsize))),
        #                 2,
        #             htemp)
        
        
        if args.do_exp_weighted_divergence:
            Sold = S.copy()
            S *=np.exp(S)
            
            
        X = get_edge_features_use_config(S.T,config_d['EDGES'])

        cur_X_patches, cur_S_patches = get_maximal_patches(X,S,patch_radius=2)
        
        X_patches.extend(cur_X_patches)
        S_patches.extend(cur_S_patches)

        num_new_patches = len(X_patches)


    X = np.array(X_patches)
    S = np.array(S_patches)
    data_shape = X.shape[1:]
    X = X.reshape(X.shape[0],np.prod(data_shape))
    bmm = bernoullimm.BernoulliMM(n_components=args.n_components,
                                  n_init= 50,
                                  n_iter= 500,
                                  random_state=0,
                                  verbose=args.v, tol=1e-6)
    bmm.fit(X)

    # check above 30
    use_means = bmm.predict_proba(X).sum(0) > 30
    print use_means.sum()
    try:
        np.save(args.save_parts,bmm.means_.reshape(*( (bmm.n_components,)+data_shape))[use_means])
    except:
        import pdb; pdb.set_trace()
    S_shape = S.shape[1:]

    import pdb; pdb.set_trace()
    S_clusters = bmm.cluster_underlying_data(S.reshape(len(S),np.prod(S_shape)),X).reshape(
            *( (bmm.n_components,) + S_shape))[use_means]
    np.save(args.spec_save_parts,S_clusters)

    ncols = int(np.sqrt(args.n_components))
    nrows = int(np.ceil(args.n_components/ncols))


    if args.viz_spec_parts is not None:
        plt.close('all')
        fig = plt.figure(1, (6, 6))
        grid = ImageGrid(fig, 111, # similar to subplot(111)
                             nrows_ncols = (nrows,ncols ), # creates 2x2 grid of axes
                             axes_pad=0.001, # pad between axes in inch.
                     )

        for i in xrange(S_clusters.shape[0]):

            try:
                grid[i].imshow(S_clusters[i],cmap=cm.binary,interpolation='nearest')
                grid[i].spines['bottom'].set_color('red')
                grid[i].spines['top'].set_color('red')
                grid[i].spines['left'].set_color('red')
                grid[i].spines['right'].set_color('red')
                for a in grid[i].axis.values():
                    a.toggle(all=False)
            except:
                import pdb; pdb.set_trace()

        for i in xrange(S_clusters.shape[0],nrows*ncols):
            try:
                grid[i].spines['bottom'].set_color('red')
            except: import pdb; pdb.set_trace()
            grid[i].spines['top'].set_color('red')
            grid[i].spines['left'].set_color('red')
            grid[i].spines['right'].set_color('red')

            for a in grid[i].axis.values():
                a.toggle(all=False)

        plt.savefig('%s' % args.viz_spec_parts
                                           ,bbox_inches='tight')