Esempio n. 1
0
def main():
    print("\n", "/" * 120, "\n")
    print(d.now())
    print("\nOUTPUTS DIRECTORY:\n{}\n".format(activity_dir))

    print("\nTRN H5:\n{}\n".format(trn_h5))
    trn_ip = ip.for_callids(
        trn_h5,
        callids=trn_callids,
        data_context=dctx,
        add_channel_at_end=add_channel_dim,
        label_subcontext=lctx,
        label_from_subcontext_fn=lctx_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=trn_class_subsampling,
        shuffle_seed=trn_shuffle_seed,
        npasses=trn_passes, )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}".
        format("TRN", trn_ip.totlen, trn_ip.nchunks, trn_ip.steps_per_pass,
               trn_ip.npasses))
    print("data shape: {}; label shape: {}".format(trn_ip.inputdatashape,
                                                   trn_ip.inputlabelshape))

    print("\nVAL H5:\n{}\n".format(val_h5))
    val_ip = ip.for_callids(
        val_h5,
        callids=val_callids,
        data_context=dctx,
        add_channel_at_end=add_channel_dim,
        label_subcontext=lctx,
        label_from_subcontext_fn=lctx_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=val_class_subsampling,
        shuffle_seed=None,  # never shuffled
        npasses=1, )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}".
        format("VAL", val_ip.totlen, val_ip.nchunks, val_ip.steps_per_pass,
               val_ip.npasses))
    print("data shape: {}; label shape: {}".format(val_ip.inputdatashape,
                                                   val_ip.inputlabelshape))

    print("\nTST H5:\n{}\n".format(tst_h5))
    tst_ip = ip.for_callids(
        tst_h5,
        callids=tst_callids,
        data_context=dctx,
        add_channel_at_end=add_channel_dim,
        label_subcontext=lctx,
        label_from_subcontext_fn=lctx_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=tst_class_subsampling,
        shuffle_seed=None,  # never shuffled
        npasses=1, )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}".
        format("TST", tst_ip.totlen, tst_ip.nchunks, tst_ip.steps_per_pass,
               tst_ip.npasses))
    print("data shape: {}; label shape: {}".format(tst_ip.inputdatashape,
                                                   tst_ip.inputlabelshape))

    trn_gen = trn_ip.flow(
        indefinitely=True,
        only_labels=False,
        with_chunking=False, )
    nepochs = epochs_per_pass * trn_passes
    steps_per_epoch = (trn_passes * trn_ip.steps_per_pass) // nepochs

    val_gen = val_ip.flow(
        indefinitely=True,
        only_labels=False,
        with_chunking=False, )
    validation_steps = val_ip.steps_per_pass

    callbacks = ku.create_callbacks(
        val_ip,
        activity_dir,
        epochs_per_pass,
        verbose=verbose == 1,
        pickle_safe=pickle_safe,
        max_q_size=max_q_size)

    input_shape = trn_ip.inputdatashape
    model = get_model(input_shape)

    print("\n", "/" * 120, "\n")
    print("MODEL SUMMARY")
    model.summary()

    print("\n", "/" * 120, "\n")
    print("TRAINING BEGINS\n")
    model.fit_generator(
        trn_gen,
        steps_per_epoch=steps_per_epoch,
        epochs=nepochs,
        validation_data=val_gen,
        validation_steps=validation_steps,
        callbacks=callbacks,
        verbose=verbose,
        pickle_safe=pickle_safe,
        max_q_size=max_q_size,
        initial_epoch=initial_epoch, )

    print("\nTRAINING ENDED")
    print(d.now())

    print("\n", "/" * 120, "\n")
    print("PREDICTING ON TEST")
    ku.predict_on_inputs_provider(
        model,
        tst_ip,
        activity_dir, )

    print("\nDONE")
    print(d.now())
Esempio n. 2
0
def main():
    print("\n", "/" * 120, "\n")
    print(d.now())
    print(
        "\n\nCHUNKWISE", {
            "no-n": "NON-NORMALIZED",
            "m-n": "MEAN-NORMALIZED",
            "mv-n": "MEAN-VARIANCE-NORMALIZED",
        }[norm], {
            "keepzero": "WITH-SILENCE",
            "skipzero": "WITHOUT-SILENCE",
            "skipzero-20one": "WITHOUT-SILENCE-SUBSAMPLED-SINGLE-SPEECH",
        }[sub], "\n\n")
    print("\nOUTPUTS DIRECTORY:\n{}\n".format(activity_dir))

    # Create input providers and shout a bunch of things ######################
    print("\nTRN H5:\n{}\n".format(trn_h5))
    trn_ip = ip.for_callids(
        trn_h5,
        callids=trn_callids,
        data_context=data_context,
        add_channel_at_end=add_channel_dim,
        label_subcontext=label_subcontext,
        label_from_subcontext_fn=label_subcontext_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=trn_class_subsampling,
        shuffle_seed=trn_shuffle_seed,
        npasses=trn_passes,
        mean_it=mean_it,
        std_it=std_it,
    )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}"
        .format("TRN", trn_ip.totlen, trn_ip.nchunks, trn_ip.steps_per_pass,
                trn_ip.npasses))
    print("data shape: {}; label shape: {}".format(trn_ip.inputdatashape,
                                                   trn_ip.inputlabelshape))

    print("\nVAL H5:\n{}\n".format(val_h5))
    val_ip = ip.for_callids(
        val_h5,
        callids=val_callids,
        data_context=data_context,
        add_channel_at_end=add_channel_dim,
        label_subcontext=label_subcontext,
        label_from_subcontext_fn=label_subcontext_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=val_class_subsampling,
        shuffle_seed=None,  # never shuffled
        npasses=1,
        mean_it=mean_it,
        std_it=std_it,
    )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}"
        .format("VAL", val_ip.totlen, val_ip.nchunks, val_ip.steps_per_pass,
                val_ip.npasses))
    print("data shape: {}; label shape: {}".format(val_ip.inputdatashape,
                                                   val_ip.inputlabelshape))

    print("\nTST H5:\n{}\n".format(tst_h5))
    tst_ip = ip.for_callids(
        tst_h5,
        callids=tst_callids,
        data_context=data_context,
        add_channel_at_end=add_channel_dim,
        label_subcontext=label_subcontext,
        label_from_subcontext_fn=label_subcontext_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=tst_class_subsampling,
        shuffle_seed=None,  # never shuffled
        npasses=1,
        mean_it=mean_it,
        std_it=std_it,
    )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}"
        .format("TST", tst_ip.totlen, tst_ip.nchunks, tst_ip.steps_per_pass,
                tst_ip.npasses))
    print("data shape: {}; label shape: {}".format(tst_ip.inputdatashape,
                                                   tst_ip.inputlabelshape))

    init, tran = read_normalized_viterbi_priors()
    print("GOT VITERBI PRIORS")
    # Setup stuff for training with keras #####################################
    trn_gen = trn_ip.flow(
        indefinitely=True,
        only_labels=False,
        with_chunking=False,
    )
    nepochs = epochs_per_pass * trn_passes
    steps_per_epoch = (trn_passes * trn_ip.steps_per_pass
                       ) // nepochs if trn_passes != 0 else 0

    val_gen = val_ip.flow(
        indefinitely=True,
        only_labels=False,
        with_chunking=False,
    )
    validation_steps = val_ip.steps_per_pass

    callbacks = ku.create_callbacks(val_ip,
                                    activity_dir,
                                    epochs_per_pass,
                                    verbose=verbose == 1,
                                    pickle_safe=pickle_safe,
                                    max_q_size=max_q_size)

    input_shape = trn_ip.inputdatashape
    model = get_model(input_shape)

    print("\n", "/" * 120, "\n")
    print("MODEL SUMMARY")
    model.summary()

    if steps_per_epoch != 0:
        print("\n", "/" * 120, "\n")
        print("TRAINING BEGINS\n")
        model.fit_generator(
            trn_gen,
            steps_per_epoch=steps_per_epoch,
            epochs=nepochs,
            validation_data=val_gen,
            validation_steps=validation_steps,
            callbacks=callbacks,
            verbose=verbose,
            pickle_safe=pickle_safe,
            max_q_size=max_q_size,
            initial_epoch=initial_epoch,
        )

        print("\nTRAINING ENDED")
        print(d.now())

    print("\n", "/" * 120, "\n")
    print("PREDICTING ON TEST")
    export_to = os.path.join(activity_dir, "confs.test.h5")
    predict_on_inputs_provider(model, tst_ip, export_to, init, tran)

    print("\nDONE")
    print(d.now())