def main(): print("\n", "/" * 120, "\n") print(d.now()) print("\nOUTPUTS DIRECTORY:\n{}\n".format(activity_dir)) print("\nTRN H5:\n{}\n".format(trn_h5)) trn_ip = ip.for_callids( trn_h5, callids=trn_callids, data_context=dctx, add_channel_at_end=add_channel_dim, label_subcontext=lctx, label_from_subcontext_fn=lctx_fn, steps_per_chunk=steps_per_chunk, classkeyfn=np.argmax, # for categorical labels class_subsample_to_ratios=trn_class_subsampling, shuffle_seed=trn_shuffle_seed, npasses=trn_passes, ) print( "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}". format("TRN", trn_ip.totlen, trn_ip.nchunks, trn_ip.steps_per_pass, trn_ip.npasses)) print("data shape: {}; label shape: {}".format(trn_ip.inputdatashape, trn_ip.inputlabelshape)) print("\nVAL H5:\n{}\n".format(val_h5)) val_ip = ip.for_callids( val_h5, callids=val_callids, data_context=dctx, add_channel_at_end=add_channel_dim, label_subcontext=lctx, label_from_subcontext_fn=lctx_fn, steps_per_chunk=steps_per_chunk, classkeyfn=np.argmax, # for categorical labels class_subsample_to_ratios=val_class_subsampling, shuffle_seed=None, # never shuffled npasses=1, ) print( "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}". format("VAL", val_ip.totlen, val_ip.nchunks, val_ip.steps_per_pass, val_ip.npasses)) print("data shape: {}; label shape: {}".format(val_ip.inputdatashape, val_ip.inputlabelshape)) print("\nTST H5:\n{}\n".format(tst_h5)) tst_ip = ip.for_callids( tst_h5, callids=tst_callids, data_context=dctx, add_channel_at_end=add_channel_dim, label_subcontext=lctx, label_from_subcontext_fn=lctx_fn, steps_per_chunk=steps_per_chunk, classkeyfn=np.argmax, # for categorical labels class_subsample_to_ratios=tst_class_subsampling, shuffle_seed=None, # never shuffled npasses=1, ) print( "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}". format("TST", tst_ip.totlen, tst_ip.nchunks, tst_ip.steps_per_pass, tst_ip.npasses)) print("data shape: {}; label shape: {}".format(tst_ip.inputdatashape, tst_ip.inputlabelshape)) trn_gen = trn_ip.flow( indefinitely=True, only_labels=False, with_chunking=False, ) nepochs = epochs_per_pass * trn_passes steps_per_epoch = (trn_passes * trn_ip.steps_per_pass) // nepochs val_gen = val_ip.flow( indefinitely=True, only_labels=False, with_chunking=False, ) validation_steps = val_ip.steps_per_pass callbacks = ku.create_callbacks( val_ip, activity_dir, epochs_per_pass, verbose=verbose == 1, pickle_safe=pickle_safe, max_q_size=max_q_size) input_shape = trn_ip.inputdatashape model = get_model(input_shape) print("\n", "/" * 120, "\n") print("MODEL SUMMARY") model.summary() print("\n", "/" * 120, "\n") print("TRAINING BEGINS\n") model.fit_generator( trn_gen, steps_per_epoch=steps_per_epoch, epochs=nepochs, validation_data=val_gen, validation_steps=validation_steps, callbacks=callbacks, verbose=verbose, pickle_safe=pickle_safe, max_q_size=max_q_size, initial_epoch=initial_epoch, ) print("\nTRAINING ENDED") print(d.now()) print("\n", "/" * 120, "\n") print("PREDICTING ON TEST") ku.predict_on_inputs_provider( model, tst_ip, activity_dir, ) print("\nDONE") print(d.now())
def main(): print("\n", "/" * 120, "\n") print(d.now()) print( "\n\nCHUNKWISE", { "no-n": "NON-NORMALIZED", "m-n": "MEAN-NORMALIZED", "mv-n": "MEAN-VARIANCE-NORMALIZED", }[norm], { "keepzero": "WITH-SILENCE", "skipzero": "WITHOUT-SILENCE", "skipzero-20one": "WITHOUT-SILENCE-SUBSAMPLED-SINGLE-SPEECH", }[sub], "\n\n") print("\nOUTPUTS DIRECTORY:\n{}\n".format(activity_dir)) # Create input providers and shout a bunch of things ###################### print("\nTRN H5:\n{}\n".format(trn_h5)) trn_ip = ip.for_callids( trn_h5, callids=trn_callids, data_context=data_context, add_channel_at_end=add_channel_dim, label_subcontext=label_subcontext, label_from_subcontext_fn=label_subcontext_fn, steps_per_chunk=steps_per_chunk, classkeyfn=np.argmax, # for categorical labels class_subsample_to_ratios=trn_class_subsampling, shuffle_seed=trn_shuffle_seed, npasses=trn_passes, mean_it=mean_it, std_it=std_it, ) print( "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}" .format("TRN", trn_ip.totlen, trn_ip.nchunks, trn_ip.steps_per_pass, trn_ip.npasses)) print("data shape: {}; label shape: {}".format(trn_ip.inputdatashape, trn_ip.inputlabelshape)) print("\nVAL H5:\n{}\n".format(val_h5)) val_ip = ip.for_callids( val_h5, callids=val_callids, data_context=data_context, add_channel_at_end=add_channel_dim, label_subcontext=label_subcontext, label_from_subcontext_fn=label_subcontext_fn, steps_per_chunk=steps_per_chunk, classkeyfn=np.argmax, # for categorical labels class_subsample_to_ratios=val_class_subsampling, shuffle_seed=None, # never shuffled npasses=1, mean_it=mean_it, std_it=std_it, ) print( "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}" .format("VAL", val_ip.totlen, val_ip.nchunks, val_ip.steps_per_pass, val_ip.npasses)) print("data shape: {}; label shape: {}".format(val_ip.inputdatashape, val_ip.inputlabelshape)) print("\nTST H5:\n{}\n".format(tst_h5)) tst_ip = ip.for_callids( tst_h5, callids=tst_callids, data_context=data_context, add_channel_at_end=add_channel_dim, label_subcontext=label_subcontext, label_from_subcontext_fn=label_subcontext_fn, steps_per_chunk=steps_per_chunk, classkeyfn=np.argmax, # for categorical labels class_subsample_to_ratios=tst_class_subsampling, shuffle_seed=None, # never shuffled npasses=1, mean_it=mean_it, std_it=std_it, ) print( "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}" .format("TST", tst_ip.totlen, tst_ip.nchunks, tst_ip.steps_per_pass, tst_ip.npasses)) print("data shape: {}; label shape: {}".format(tst_ip.inputdatashape, tst_ip.inputlabelshape)) init, tran = read_normalized_viterbi_priors() print("GOT VITERBI PRIORS") # Setup stuff for training with keras ##################################### trn_gen = trn_ip.flow( indefinitely=True, only_labels=False, with_chunking=False, ) nepochs = epochs_per_pass * trn_passes steps_per_epoch = (trn_passes * trn_ip.steps_per_pass ) // nepochs if trn_passes != 0 else 0 val_gen = val_ip.flow( indefinitely=True, only_labels=False, with_chunking=False, ) validation_steps = val_ip.steps_per_pass callbacks = ku.create_callbacks(val_ip, activity_dir, epochs_per_pass, verbose=verbose == 1, pickle_safe=pickle_safe, max_q_size=max_q_size) input_shape = trn_ip.inputdatashape model = get_model(input_shape) print("\n", "/" * 120, "\n") print("MODEL SUMMARY") model.summary() if steps_per_epoch != 0: print("\n", "/" * 120, "\n") print("TRAINING BEGINS\n") model.fit_generator( trn_gen, steps_per_epoch=steps_per_epoch, epochs=nepochs, validation_data=val_gen, validation_steps=validation_steps, callbacks=callbacks, verbose=verbose, pickle_safe=pickle_safe, max_q_size=max_q_size, initial_epoch=initial_epoch, ) print("\nTRAINING ENDED") print(d.now()) print("\n", "/" * 120, "\n") print("PREDICTING ON TEST") export_to = os.path.join(activity_dir, "confs.test.h5") predict_on_inputs_provider(model, tst_ip, export_to, init, tran) print("\nDONE") print(d.now())