def compute_edge_features(x,config_d): S, sample_mapping, sample_to_frames = esp.get_spectrogram_features(x.astype(float)/(2**15-1), config_d['SPECTROGRAM']['sample_rate'], config_d['SPECTROGRAM']['num_window_samples'], config_d['SPECTROGRAM']['num_window_step_samples'], config_d['SPECTROGRAM']['fft_length'], config_d['SPECTROGRAM']['freq_cutoff'], config_d['SPECTROGRAM']['kernel_length'], preemph=config_d['SPECTROGRAM']['preemphasis'], no_use_dpss=config_d['SPECTROGRAM']['no_use_dpss'], do_freq_smoothing=config_d['SPECTROGRAM']['do_freq_smoothing'], return_sample_mapping=True ) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S.T) esp._edge_map_threshold_segments(E, config_d['EDGES']['block_length'], config_d['EDGES']['spread_length'], threshold=config_d['EDGES']['threshold'], edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks, abst_threshold=config_d['EDGES']['abst_threshold'], verbose=False) return reorg_part_for_fast_filtering(E).astype(np.uint8)
def get_waliji_feature_map(s, log_part_blocks, log_invpart_blocks, abst_threshold=np.array([.025,.025,.015,.015, .02,.02,.02,.02]), spread_length=3, fft_length=512, num_window_step_samples=80, freq_cutoff=3000, sample_rate=16000, num_window_samples=320, kernel_length=7): """ Input is usually just the signal s as the rest of the parameters are not going to change very often Parameters: =========== s: np.ndarray[ndim=1] Raw signal data that we are extracting feature from log_part_blocks: np.ndarray[ndim=4,dtype=np.float32] First dimension is over the different features log_invpart_blocks: np.ndarray[ndim=4,dtype=np.float32] Essentially the same array as log_part_blocks. Related by its equal to np.log(1-np.exp(log_part_blocks)) """ S = esp.get_spectrogram_features(s, sample_rate, num_window_samples, num_window_step_samples, fft_length, freq_cutoff, kernel_length, ) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S) esp._edge_map_threshold_segments(E, 40, 1, threshold=.7, edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks) E = reorg_part_for_fast_filtering(E) F = cp.code_parts_fast(E.astype(np.uint8),log_part_blocks,log_invpart_blocks,10) F = np.argmax(F,2) # the amount of spreading to do is governed by the size of the part features F = swp.spread_waliji_patches(F, log_part_blocks.shape[1], log_part_blocks.shape[2], log_part_blocks.shape[0]) return collapse_to_grid(F,log_part_blocks.shape[1], log_part_blocks.shape[2])
s_idx_list = [] for s_idx, s_fname in enumerate(s_fnames[:400]): print "s_idx = %d" %s_idx f.write(str(s_idx)+'\t'+s_fname+'\n') s = np.load(s_fname) S = esp.get_spectrogram_features(s, sample_rate, num_window_samples, num_window_step_samples, fft_length, freq_cutoff, kernel_length) if lower_cutoff == 10: np.save(tmp_data_path+str(s_idx)+'S.npy',S) E, edge_feature_row_breaks,\ edge_orientations = esp._edge_map_no_threshold(S) esp._edge_map_threshold_segments(E, 20, 1, threshold=.7, edge_orientations = edge_orientations, edge_feature_row_breaks = edge_feature_row_breaks) if lower_cutoff == 10: np.save(tmp_data_path+str(s_idx)+'E.npy',E) patch_width = 5 patch_height = 5 upper_cutoff = 200 bp,all_patch_rows,all_patch_cols = elf.extract_local_features_tied(E,patch_height, patch_width, lower_cutoff, upper_cutoff, edge_feature_row_breaks,
def main(args): if args.v: print args print "Checking value of args.useDefaultParams=", args.useDefaultParams if args.useDefaultParams: if args.v: # retrieving params with function print "Using default parameters" ( sp, ep, root_path, utterances_path, file_indices, num_mix_params, test_path, train_path, train_example_lengths, train_file_indices, test_example_lengths, test_file_indices, ) = get_params(args) else: # need to change this to something else ( sp, ep, root_path, utterances_path, file_indices, num_mix_params, test_path, train_path, train_example_lengths, train_file_indices, test_example_lengths, test_file_indices, ) = get_params(args) file_indices = get_file_indices(args.fileIndicesPath, args.dataPath) if args.limitFileIndices > -1: if args.v: print "Limiting file indices to length %d" % args.limitFileIndices file_indices = file_indices[: args.limitFileIndices] elif args.v: print "No limit on file indices" if args.partsPath != "": if args.v: print "Loading in parts from %s" % args.partsPath EParts = np.clip(np.load(args.partsPath), 0.01, 0.99) logParts = np.log(EParts).astype(np.float64) logInvParts = np.log(1 - EParts).astype(np.float64) if args.printEdgeDistribution: edge_distribution = getEdgeDistribution(file_indices, args.hw, file_indices_chunks=args.file_indices_chunks) elif args.edgeQuantileComparison != "": for fl_id, fl in enumerate(file_indices): if args.v: print fl_id utterance = gtrd.makeUtterance(args.dataPath, fl) print sp, args.mel_smoothing_kernel S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel) E, edge_feature_row_breaks, edge_orientations = esp._edge_map_no_threshold(S.T) E2 = np.empty((E.shape[0] / 8, E.shape[1], 8)) for i in xrange(8): E2[:, :, i] = E[E.shape[0] / 8 * i : E.shape[0] / 8 * (i + 1), :] print E2.shape, S.shape visualize_spec_and_quantiles( "%s_%d" % (args.edgeQuantileComparison, fl_id), E2, S, args.quantileSet, args.blockLengthSet ) elif args.createBackgroundMixture > 0: pass elif args.getUnclippedBackground != "": # initialize the background if args.v: print "Initializing average background to be computed over parts" avg_bgd = gtrd.AverageBackground() for fl_id, fl in enumerate(file_indices): if args.v: print fl_id utterance = gtrd.makeUtterance(args.dataPath, fl) print sp, args.mel_smoothing_kernel S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel) E = gtrd.get_edge_features(S.T, ep, verbose=False) if args.seeBackgroundEstimatePlots != "": visualize_edge_on_specs("%s_%d.png" % (args.seeBackgroundEstimatePlots, fl_id), E, S) out = code_parts.code_parts(E.astype(np.uint8), logParts, logInvParts, args.bernsteinEdgeThreshold) max_responses = np.argmax(out, -1) if args.bernsteinPreSpreadVisualizeOnSpec != "": # cycle over all parts for part_id in xrange(logParts.shape[0]): visualize_bern_on_specs( "%s_%d_%d.png" % (args.bernsteinPreSpreadVisualizeOnSpec, fl_id, part_id), max_responses, S, part_id, ) bin_out_map = code_parts.spread_patches(max_responses, 2, 2, out.shape[-1] - 1) avg_bgd.add_frames(bin_out_map, time_axis=0) np.save(args.getUnclippedBackground, avg_bgd.E) else: pass