def main(args): if args.v: print args print "Checking value of args.useDefaultParams=", args.useDefaultParams if args.useDefaultParams: if args.v: # retrieving params with function print "Using default parameters" ( sp, ep, root_path, utterances_path, file_indices, num_mix_params, test_path, train_path, train_example_lengths, train_file_indices, test_example_lengths, test_file_indices, ) = get_params(args) else: # need to change this to something else ( sp, ep, root_path, utterances_path, file_indices, num_mix_params, test_path, train_path, train_example_lengths, train_file_indices, test_example_lengths, test_file_indices, ) = get_params(args) file_indices = get_file_indices(args.fileIndicesPath, args.dataPath) if args.limitFileIndices > -1: if args.v: print "Limiting file indices to length %d" % args.limitFileIndices file_indices = file_indices[: args.limitFileIndices] elif args.v: print "No limit on file indices" if args.partsPath != "": if args.v: print "Loading in parts from %s" % args.partsPath EParts = np.clip(np.load(args.partsPath), 0.01, 0.99) logParts = np.log(EParts).astype(np.float64) logInvParts = np.log(1 - EParts).astype(np.float64) if args.printEdgeDistribution: edge_distribution = getEdgeDistribution(file_indices, args.hw, file_indices_chunks=args.file_indices_chunks) elif args.edgeQuantileComparison != "": for fl_id, fl in enumerate(file_indices): if args.v: print fl_id utterance = gtrd.makeUtterance(args.dataPath, fl) print sp, args.mel_smoothing_kernel S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel) E, edge_feature_row_breaks, edge_orientations = esp._edge_map_no_threshold(S.T) E2 = np.empty((E.shape[0] / 8, E.shape[1], 8)) for i in xrange(8): E2[:, :, i] = E[E.shape[0] / 8 * i : E.shape[0] / 8 * (i + 1), :] print E2.shape, S.shape visualize_spec_and_quantiles( "%s_%d" % (args.edgeQuantileComparison, fl_id), E2, S, args.quantileSet, args.blockLengthSet ) elif args.createBackgroundMixture > 0: pass elif args.getUnclippedBackground != "": # initialize the background if args.v: print "Initializing average background to be computed over parts" avg_bgd = gtrd.AverageBackground() for fl_id, fl in enumerate(file_indices): if args.v: print fl_id utterance = gtrd.makeUtterance(args.dataPath, fl) print sp, args.mel_smoothing_kernel S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel) E = gtrd.get_edge_features(S.T, ep, verbose=False) if args.seeBackgroundEstimatePlots != "": visualize_edge_on_specs("%s_%d.png" % (args.seeBackgroundEstimatePlots, fl_id), E, S) out = code_parts.code_parts(E.astype(np.uint8), logParts, logInvParts, args.bernsteinEdgeThreshold) max_responses = np.argmax(out, -1) if args.bernsteinPreSpreadVisualizeOnSpec != "": # cycle over all parts for part_id in xrange(logParts.shape[0]): visualize_bern_on_specs( "%s_%d_%d.png" % (args.bernsteinPreSpreadVisualizeOnSpec, fl_id, part_id), max_responses, S, part_id, ) bin_out_map = code_parts.spread_patches(max_responses, 2, 2, out.shape[-1] - 1) avg_bgd.add_frames(bin_out_map, time_axis=0) np.save(args.getUnclippedBackground, avg_bgd.E) else: pass
# # need to access the files where we perform the estimation # utterances_path = '/home/mark/Template-Speech-Recognition/Data/Train/' file_indices = gtrd.get_data_files_indices(utterances_path) inner_mask = np.zeros((5,5),dtype=np.uint8) inner_mask[1:-1,1:-1] = 1 all_patches = np.zeros((0,5,5,8),dtype=np.uint8) all_S_patches = np.zeros((0,5,5),dtype=np.float32) inner_thresh = 9 outer_thresh = 40 for fl_idx,fl in enumerate(file_indices[:50]): utterance = gtrd.makeUtterance(utterances_path,fl) S = gtrd.get_spectrogram(utterance.s,sp) E = gtrd.get_edge_features(S.T,ep,verbose=False) patch_set, S_patch_set, patch_counts, patch_locs = cp.get_parts_mask(E.astype(np.uint8), inner_mask, S.astype(np.float32), inner_thresh) use_patch_ids = patch_set.sum(-1).sum(-1).sum(-1) >= outer_thresh all_patches = np.vstack( (all_patches, patch_set[use_patch_ids ])) all_S_patches = np.vstack( (all_S_patches, S_patch_set[use_patch_ids].astype(np.float32))) fl = file_indices[49]