def output_detection_scores(s_fname,phns_fname,flts_fname, scores_by_syllable,model_dict_list): """ Parameters: =========== model_dict_list: hashtable, values are lists of models, keys are syllables the lists of models are actually given by the number of mixture components Returns: ========= outputs_by_syllable: hashtable, keys are syllables, values are also hash tables keys to the hash are the number of components in the mixture model, the values to this hash table are lists of detection scores over all the utterances """ s = np.load(s_fname) phns = np.load(phns_fname) flts = np.load(flts_fname) S = esp.get_spectrogram_features(s, sample_rate, num_window_samples, num_window_step_samples, fft_length, freq_cutoff, kernel_length)
def compute_edge_features(x,config_d): S, sample_mapping, sample_to_frames = esp.get_spectrogram_features(x.astype(float)/(2**15-1), config_d['SPECTROGRAM']['sample_rate'], config_d['SPECTROGRAM']['num_window_samples'], config_d['SPECTROGRAM']['num_window_step_samples'], config_d['SPECTROGRAM']['fft_length'], config_d['SPECTROGRAM']['freq_cutoff'], config_d['SPECTROGRAM']['kernel_length'], preemph=config_d['SPECTROGRAM']['preemphasis'], no_use_dpss=config_d['SPECTROGRAM']['no_use_dpss'], do_freq_smoothing=config_d['SPECTROGRAM']['do_freq_smoothing'], return_sample_mapping=True ) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S.T) esp._edge_map_threshold_segments(E, config_d['EDGES']['block_length'], config_d['EDGES']['spread_length'], threshold=config_d['EDGES']['threshold'], edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks, abst_threshold=config_d['EDGES']['abst_threshold'], verbose=False) return reorg_part_for_fast_filtering(E).astype(np.uint8)
def get_waliji_feature_map(s, log_part_blocks, log_invpart_blocks, abst_threshold=np.array([.025,.025,.015,.015, .02,.02,.02,.02]), spread_length=3, fft_length=512, num_window_step_samples=80, freq_cutoff=3000, sample_rate=16000, num_window_samples=320, kernel_length=7): """ Input is usually just the signal s as the rest of the parameters are not going to change very often Parameters: =========== s: np.ndarray[ndim=1] Raw signal data that we are extracting feature from log_part_blocks: np.ndarray[ndim=4,dtype=np.float32] First dimension is over the different features log_invpart_blocks: np.ndarray[ndim=4,dtype=np.float32] Essentially the same array as log_part_blocks. Related by its equal to np.log(1-np.exp(log_part_blocks)) """ S = esp.get_spectrogram_features(s, sample_rate, num_window_samples, num_window_step_samples, fft_length, freq_cutoff, kernel_length, ) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S) esp._edge_map_threshold_segments(E, 40, 1, threshold=.7, edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks) E = reorg_part_for_fast_filtering(E) F = cp.code_parts_fast(E.astype(np.uint8),log_part_blocks,log_invpart_blocks,10) F = np.argmax(F,2) # the amount of spreading to do is governed by the size of the part features F = swp.spread_waliji_patches(F, log_part_blocks.shape[1], log_part_blocks.shape[2], log_part_blocks.shape[0]) return collapse_to_grid(F,log_part_blocks.shape[1], log_part_blocks.shape[2])
offset=3 for lower_cutoff in [10,30,40,50,60]: print "lower_cutoff = %d" %lower_cutoff bps = np.zeros((0,40,5),dtype=np.uint8) spec_windows = np.zeros((0,6,6)) f = open(tmp_data_path+'guide_to_tmp_data_%d.txt' %lower_cutoff,'w') s_idx_list = [] for s_idx, s_fname in enumerate(s_fnames[:400]): print "s_idx = %d" %s_idx f.write(str(s_idx)+'\t'+s_fname+'\n') s = np.load(s_fname) S = esp.get_spectrogram_features(s, sample_rate, num_window_samples, num_window_step_samples, fft_length, freq_cutoff, kernel_length) if lower_cutoff == 10: np.save(tmp_data_path+str(s_idx)+'S.npy',S) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S) esp._edge_map_threshold_segments(E, 20, 1, threshold=.7, edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks) if lower_cutoff == 10: np.save(tmp_data_path+str(s_idx)+'E.npy',E)
def main(args): """ For each label and component constructed a positive and negative training set and train a linear SVM to separate them """ config_d = configParserWrapper.load_settings(open(args.config,'r')) true_examples = [] false_examples = [] mean = 0 total = 0 num_less_than_eq = np.zeros(20) fls = np.loadtxt(args.fls_txt, dtype=str) all_X_patches = [] all_S_patches = [] htemp, dhtemp, ddhtemp, tttemp = fb.hermite_window( args.winsize, args.num_tapers, args.win_half_time_support) run_transform = lambda x, winlength : esp.get_spectrogram_features(x, 16000, winlength, 80, 2**(int(np.ceil(np.log2(winlength)))), 4000, 7, ) X_patches = [] S_patches = [] for fl_id, fl_path in enumerate(fls): if len(X_patches) > 100000: break S = run_transform(wavfile.read(fl_path)[1], args.winsize) # spectrogram(, # 16000, # 3200, # args.winsize, # 2**int(np.ceil(np.log2(args.winsize))), # 2, # htemp) if args.do_exp_weighted_divergence: Sold = S.copy() S *=np.exp(S) X = get_edge_features_use_config(S.T,config_d['EDGES']) cur_X_patches, cur_S_patches = get_maximal_patches(X,S,patch_radius=2) X_patches.extend(cur_X_patches) S_patches.extend(cur_S_patches) num_new_patches = len(X_patches) X = np.array(X_patches) S = np.array(S_patches) data_shape = X.shape[1:] X = X.reshape(X.shape[0],np.prod(data_shape)) bmm = bernoullimm.BernoulliMM(n_components=args.n_components, n_init= 50, n_iter= 500, random_state=0, verbose=args.v, tol=1e-6) bmm.fit(X) # check above 30 use_means = bmm.predict_proba(X).sum(0) > 30 print use_means.sum() try: np.save(args.save_parts,bmm.means_.reshape(*( (bmm.n_components,)+data_shape))[use_means]) except: import pdb; pdb.set_trace() S_shape = S.shape[1:] import pdb; pdb.set_trace() S_clusters = bmm.cluster_underlying_data(S.reshape(len(S),np.prod(S_shape)),X).reshape( *( (bmm.n_components,) + S_shape))[use_means] np.save(args.spec_save_parts,S_clusters) ncols = int(np.sqrt(args.n_components)) nrows = int(np.ceil(args.n_components/ncols)) if args.viz_spec_parts is not None: plt.close('all') fig = plt.figure(1, (6, 6)) grid = ImageGrid(fig, 111, # similar to subplot(111) nrows_ncols = (nrows,ncols ), # creates 2x2 grid of axes axes_pad=0.001, # pad between axes in inch. ) for i in xrange(S_clusters.shape[0]): try: grid[i].imshow(S_clusters[i],cmap=cm.binary,interpolation='nearest') grid[i].spines['bottom'].set_color('red') grid[i].spines['top'].set_color('red') grid[i].spines['left'].set_color('red') grid[i].spines['right'].set_color('red') for a in grid[i].axis.values(): a.toggle(all=False) except: import pdb; pdb.set_trace() for i in xrange(S_clusters.shape[0],nrows*ncols): try: grid[i].spines['bottom'].set_color('red') except: import pdb; pdb.set_trace() grid[i].spines['top'].set_color('red') grid[i].spines['left'].set_color('red') grid[i].spines['right'].set_color('red') for a in grid[i].axis.values(): a.toggle(all=False) plt.savefig('%s' % args.viz_spec_parts ,bbox_inches='tight')