Ejemplo n.º 1
0
def read_normalized_viterbi_priors():
    # read raw priors from val.h5 and trn.h5
    with hFile(val_h5, 'r') as f:
        vinit = f["viterbi/init"][()]
        vtran = f["viterbi/tran"][()]

    with hFile(trn_h5, 'r') as f:
        tinit = f["viterbi/init"][()]
        ttran = f["viterbi/tran"][()]

    init = vinit + tinit
    tran = vtran + ttran

    return lu.normalize_raw_viterbi_priors(init, tran)
Ejemplo n.º 2
0
    def _calconf_save(Ytrue, Ypred, path_postfix):
        conf = confusion_matrix_forcategorical(
            Ytrue,  # NOTE: Assumed categorical!!
            to_categorical(
                Ypred.argmax(
                    axis=-1
                ),  # NOTE: Assumed categorical "predictions". Argmax for softmax.
                nclasses=Ytrue.shape[-1],
            ),
        )
        paths = [
            "{}/{}".format(_p, path_postfix)
            for _p in ('trues', 'preds', 'confs')
        ]
        datas = [Ytrue, Ypred, conf]

        with hFile(export_to, 'a') as f:
            for path, data in zip(paths, datas):
                if path not in f.keys():
                    f.create_dataset(path,
                                     data=data,
                                     compression='lzf',
                                     fletcher32=True)

            f.flush()

        return conf
Ejemplo n.º 3
0
def main():
    print("\n", "/" * 120, "\n")
    print(d.now())
    print("\nOUTPUTS DIRECTORY:\n{}\n".format(activity_dir))

    print("\nTST H5:\n{}\n".format(tst_h5))
    tst_ip = ip.for_callids(
        tst_h5,
        callids=tst_callids,
        data_context=dctx,
        add_channel_at_end=add_channel_dim,
        label_subcontext=lctx,
        label_from_subcontext_fn=lctx_fn,
        steps_per_chunk=steps_per_chunk,
        classkeyfn=np.argmax,  # for categorical labels
        class_subsample_to_ratios=tst_class_subsampling,
        shuffle_seed=None,  # never shuffled
        npasses=1,
        mean_it=mean_it,
        std_it=std_it,
    )

    print(
        "{}: max-totlen: {:,}; nchunks: {:,}; steps_per_pass: {:,}; npasses: {:,}"
        .format("TST", tst_ip.totlen, tst_ip.nchunks, tst_ip.steps_per_pass,
                tst_ip.npasses))
    print("data shape: {}; label shape: {}".format(tst_ip.inputdatashape,
                                                   tst_ip.inputlabelshape))

    with hFile(priors_fp, 'r') as pf:
        init = np.array(pf['init'])
        tran = np.array(pf['tran'])
        priors = np.array(pf['priors'])

    init = init / init.sum()
    tran = nu.normalize_confusion_matrix(tran)[1]
    priors = priors / priors.sum()

    print("\n", "/" * 120, "\n")
    print("DONE SETTING PRIORS - NOW, MODEL SUMMARY")
    kmodel.summary()

    print("\n", "/" * 120, "\n")
    print("PREDICTING ON TEST")
    predict_on_inputs_provider(
        model=kmodel,
        inputs_provider=tst_ip,
        subsampling=sub,
        export_to_dir=os.path.join(activity_dir, activity_name),
        init=init,
        tran=tran,
        priors=priors,
    )

    print("\nDONE")
    print(d.now())
Ejemplo n.º 4
0
    def _save(paths, datas):
        with hFile(export_to, 'a') as f:
            for path, data in zip(paths, datas):
                if path not in f.keys():
                    f.create_dataset(path,
                                     data=data,
                                     compression='lzf',
                                     fletcher32=True)

            f.flush()
Ejemplo n.º 5
0
    def _maybe_export(self, datas, paths, multi=False):
        if self.export_to is not None:
            if not multi:
                datas = [datas]
                paths = [paths]

            with hFile(self.export_to, 'a') as f:
                for path, data in zip(paths, datas):
                    if path not in f.keys():
                        f.create_dataset(path,
                                         data=data,
                                         compression='lzf',
                                         fletcher32=True)

                f.flush()
Ejemplo n.º 6
0
    def __init__(self, model_fp):
        # loading audio
        self.samplerate = 8000
        self.mono = True
        self.loadaudio = lambda fp: au.load_audio(
            filepath=fp,
            samplerate=self.samplerate,
            mono=self.mono,
        )

        # feature extraction
        self.win_len = int(self.samplerate * 0.032)
        self.hop_len = int(self.samplerate * 0.010)
        self.window = 'hann'
        self.n_mels = 64
        self.exttractfeat = lambda y: au.logmelspectrogram(y=y,
                                                           sr=self.samplerate,
                                                           n_fft=self.win_len,
                                                           hop_len=self.
                                                           hop_len,
                                                           n_mels=self.n_mels,
                                                           window=self.window)

        # feature normalization
        self.std_it = False
        self.norm_winsec = 200
        self.norm_winlen = int((self.hop_len / self.samplerate * 1000) *
                               self.norm_winsec)  # seconds
        self.first_mean_var = 'copy'
        self.normalize = lambda feat: nu.normalize_mean_std_rolling(
            feat,
            win_len=self.norm_winlen,
            std_it=self.std_it,
            first_mean_var=self.first_mean_var)

        # adding data-context
        self.data_context = 21
        self.addcontext = lambda x: nu.strided_view(
            x, win_shape=self.data_context, step_shape=1)

        # input generator
        self.batchsize = 256
        self.get_inputsgenerator = lambda X: (
            len(X) // self.batchsize + int(len(X) % self.batchsize != 0
                                           ),  # nsteps
            ([x[..., None], x[..., None]] for x in chain(
                nu.strided_view(
                    X, win_shape=self.batchsize, step_shape=self.batchsize),
                repeat(X[-(len(X) - self.batchsize *
                           (len(X) // self.batchsize)):, ...]))))

        # predict
        self.model = load_model(model_fp)
        self.verbose = 0
        self.max_q_size = 4

        # merging preds
        self.mergepreds_weights = np.array([[2, 2, 3], [0, 1, 1]])
        self.mergepreds_fn = lambda preds: mu.mergepreds_avg(
            preds, weights=self.mergepreds_weights)

        # viterbi smoothing
        with hFile(model_fp, 'r') as f:
            self.rinit = f['rennet/model/viterbi/init'][()]
            self.rtran = f['rennet/model/viterbi/tran'][()]
        self.vinit, self.vtran = lu.normalize_raw_viterbi_priors(
            self.rinit, self.rtran)

        # output
        self.seq_minstart = (
            self.win_len // 2 +  # removed during feature extraction
            int((self.data_context // 2) *
                self.hop_len)  # rm during adding data-context
        ) / self.hop_len  # bringing to hop's samplerate. NOTE: yes, this can float
        self.seq_samplerate = self.samplerate // self.hop_len
        self.seq_keep = 'keys'
        self.label_tiers = {
            0: "pred_none",
            1: "pred_single",
            2: "pred_multiple",
        }
        self.seq_annotinfo_fn = lambda label: lu.EafAnnotationInfo(
            tier_name=self.label_tiers[label])
        self._cached_preds = dict()

        # get and set any params defined in the model_fp
        with hFile(model_fp, 'r') as f:
            model_group = f['rennet/model']
            print()
            for att in model_group.keys():
                if att == 'viterbi':
                    continue
                elif att in self.__dict__:
                    val = model_group[att][()]
                    prev = getattr(self, att)
                    setattr(self, att, val)

                    # IDEA: move this to __setattr__ method to shout-out **all** changes.
                    #       It will shout even on __init__ then,
                    #       which will have to be handled appropriately.
                    print(
                        "{}.{} updated from model file, from {} to {}".format(
                            self.__class__.__name__, att, prev, val))

                # IDEA: Should we be pesky and raise errors when
                # there are unavailable `att` in the model file?
            print()