def main(args): if args.v: print args print "Checking value of args.useDefaultParams=", args.useDefaultParams if args.useDefaultParams: if args.v: # retrieving params with function print "Using default parameters" ( sp, ep, root_path, utterances_path, file_indices, num_mix_params, test_path, train_path, train_example_lengths, train_file_indices, test_example_lengths, test_file_indices, ) = get_params(args) else: # need to change this to something else ( sp, ep, root_path, utterances_path, file_indices, num_mix_params, test_path, train_path, train_example_lengths, train_file_indices, test_example_lengths, test_file_indices, ) = get_params(args) file_indices = get_file_indices(args.fileIndicesPath, args.dataPath) if args.limitFileIndices > -1: if args.v: print "Limiting file indices to length %d" % args.limitFileIndices file_indices = file_indices[: args.limitFileIndices] elif args.v: print "No limit on file indices" if args.partsPath != "": if args.v: print "Loading in parts from %s" % args.partsPath EParts = np.clip(np.load(args.partsPath), 0.01, 0.99) logParts = np.log(EParts).astype(np.float64) logInvParts = np.log(1 - EParts).astype(np.float64) if args.printEdgeDistribution: edge_distribution = getEdgeDistribution(file_indices, args.hw, file_indices_chunks=args.file_indices_chunks) elif args.edgeQuantileComparison != "": for fl_id, fl in enumerate(file_indices): if args.v: print fl_id utterance = gtrd.makeUtterance(args.dataPath, fl) print sp, args.mel_smoothing_kernel S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel) E, edge_feature_row_breaks, edge_orientations = esp._edge_map_no_threshold(S.T) E2 = np.empty((E.shape[0] / 8, E.shape[1], 8)) for i in xrange(8): E2[:, :, i] = E[E.shape[0] / 8 * i : E.shape[0] / 8 * (i + 1), :] print E2.shape, S.shape visualize_spec_and_quantiles( "%s_%d" % (args.edgeQuantileComparison, fl_id), E2, S, args.quantileSet, args.blockLengthSet ) elif args.createBackgroundMixture > 0: pass elif args.getUnclippedBackground != "": # initialize the background if args.v: print "Initializing average background to be computed over parts" avg_bgd = gtrd.AverageBackground() for fl_id, fl in enumerate(file_indices): if args.v: print fl_id utterance = gtrd.makeUtterance(args.dataPath, fl) print sp, args.mel_smoothing_kernel S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel) E = gtrd.get_edge_features(S.T, ep, verbose=False) if args.seeBackgroundEstimatePlots != "": visualize_edge_on_specs("%s_%d.png" % (args.seeBackgroundEstimatePlots, fl_id), E, S) out = code_parts.code_parts(E.astype(np.uint8), logParts, logInvParts, args.bernsteinEdgeThreshold) max_responses = np.argmax(out, -1) if args.bernsteinPreSpreadVisualizeOnSpec != "": # cycle over all parts for part_id in xrange(logParts.shape[0]): visualize_bern_on_specs( "%s_%d_%d.png" % (args.bernsteinPreSpreadVisualizeOnSpec, fl_id, part_id), max_responses, S, part_id, ) bin_out_map = code_parts.spread_patches(max_responses, 2, 2, out.shape[-1] - 1) avg_bgd.add_frames(bin_out_map, time_axis=0) np.save(args.getUnclippedBackground, avg_bgd.E) else: pass
def save_syllable_features_to_data_dir( args, phn_tuple, utterances_path, file_indices, sp, ep, phn_mapping, tag_data_with_syllable_string=False, save_tag="train", waveform_offset=10, block_features=False, ): """ Wrapper function to get all the examples processed """ print "Collecting the data for phn_tuple " + " ".join("%s" % k for k in phn_tuple) syllable_string = "_".join(p for p in phn_tuple) phn_features, avg_bgd = gtrd.get_syllable_features_directory( utterances_path, file_indices, phn_tuple, S_config=sp, E_config=ep, offset=0, E_verbose=False, return_avg_bgd=True, waveform_offset=15, phn_mapping=phn_mapping, ) bgd = np.clip(avg_bgd.E, 0.01, 0.4) np.save("data/bgd.npy", bgd) example_mat = gtrd.recover_example_map(phn_features) lengths, waveforms = gtrd.recover_waveforms(phn_features, example_mat) if tag_data_with_syllable_string: np.savez( "data/%s_waveforms_lengths_%s.npz" % (syllable_string, save_tag), waveforms=waveforms, lengths=lengths, example_mat=example_mat, ) else: np.savez( "data/waveforms_lengths_%s.npz" % save_tag, waveforms=waveforms, lengths=lengths, example_mat=example_mat ) Slengths, Ss = gtrd.recover_specs(phn_features, example_mat) Ss = Ss.astype(np.float32) if tag_data_with_syllable_string: np.savez( "data/%s_Ss_lengths_%s.npz" % (syllable_string, save_tag), Ss=Ss, Slengths=Slengths, example_mat=example_mat ) else: np.savez("data/Ss_lengths_%s.npz" % (save_tag), Ss=Ss, Slengths=Slengths, example_mat=example_mat) Elengths, Es = gtrd.recover_edgemaps(phn_features, example_mat, bgd=bgd) Es = Es.astype(np.uint8) if tag_data_with_syllable_string: np.savez( "data/%s_Es_lengths_%s.npz" % (syllable_string, save_tag), Es=Es, Elengths=Elengths, example_mat=example_mat ) else: np.savez("data/Es_lengths_%s.npz" % (save_tag), Es=Es, Elengths=Elengths, example_mat=example_mat) if args.doBlockFeatures: out = code_parts.code_parts(E.astype(np.uint8), logParts, logInvParts, args.bernsteinEdgeThreshold) max_responses = np.argmax(out, -1) Bs = code_parts.spread_patches(max_responses, 2, 2, out.shape[-1] - 1)