Exemple #1
0
def err_per_phone(flags_fname, scores_fname, threshold, text_fname, phn_map,
                  misp_trend):
    flags_dict = {k: v for k, v in kio.read_vec_int_ark(flags_fname)}
    text_dict = {k: v for k, v in kio.read_vec_int_ark(text_fname)}

    far = np.zeros((len(phn_map), len(threshold)), dtype=np.int32)
    frr = np.zeros((len(phn_map), len(threshold)), dtype=np.int32)
    cnt_mp = np.zeros(len(phn_map), dtype=np.int32)
    cnt_cp = np.zeros(len(phn_map), dtype=np.int32)

    for utt_id, scores in kio.read_vec_flt_ark(scores_fname):
        flags = flags_dict[utt_id]
        phones = text_dict[utt_id]

        for i in range(len(flags)):
            if flags[i] > 2:
                continue

            phn = phones[i]
            mp = misp_fn[misp_trend](scores[i], threshold)

            if flags[i] == 0:
                cnt_cp[phn] += 1
                frr[phn, mp == True] = frr[phn, mp == True] + 1
            elif flags[i] > 0:
                cnt_mp[phn] += 1
                far[phn, mp == False] = far[phn, mp == False] + 1

    cnt_mp = cnt_mp.astype(np.float)
    cnt_cp = cnt_cp.astype(np.float)
    cnt_mp[cnt_mp == 0.] = float('nan')
    cnt_cp[cnt_cp == 0.] = float('nan')
    cnt_mp = np.reshape(cnt_mp, (-1, 1))
    cnt_cp = np.reshape(cnt_cp, (-1, 1))
    return (far / cnt_mp), (frr / cnt_cp)
 def testInt32VectorReadWrite(self):
     """
     Test read/write for int32 vectors.
     """
     # read,
     i32_vec = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali.ark') } # binary,
     i32_vec2 = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark') } # ascii,
     # re-save the data,
     with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark','wb') as f:
         for k,v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k)
     # read and make sure it is the same,
     for k,v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'):
         self.assertTrue(np.array_equal(v,i32_vec[k]), msg="int32 vector same after re-saving")
Exemple #3
0
def get_vad_dict(ipath2vad_ark):
    vad_dict = dict()
    for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2vad_ark):
        assert (uttid not in vad_dict
                ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_ark)
        vad_dict[uttid] = vad_vec
    return vad_dict
Exemple #4
0
def main():
    assert len(sys.argv) == 2, "Improper number of arguments."

    ipath2vad_scp = sys.argv[1]
    # for debugging
    # ipath2vad_scp="ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/_vad/vad_dev_1s.1.scp ark,t:-|"

    # use dict rather than list for further investigation of some specific utterance
    # but there is no need to realize this so far
    numvoicedframes = dict()

    for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2vad_scp):
        assert (uttid not in numvoicedframes
                ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_scp)
        numvoicedframes[uttid] = (np.sum(vad_vec), vad_vec.shape[0])

    # draw histogram for voiced frames
    draw_histogram(numvoicedframes, 0,
                   "distribution of number of voiced frames per utterance")

    # draw histogram for all frames
    draw_histogram(numvoicedframes, 1,
                   "distrbution of number of frames per utterance")

    # get voiced/whole frames ratio
    print("The nvoiced/nwhole ratio is %.4f" % get_ratio(numvoicedframes))
Exemple #5
0
def load_dataset(fea_scp,fea_opts,lab_folder,lab_opts,left,right):
    
 fea= { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats scp:'+fea_scp+' ark:- |'+fea_opts) }
 lab= { k:v for k,v in kaldi_io.read_vec_int_ark('gunzip -c '+lab_folder+'/ali*.gz | '+lab_opts+' '+lab_folder+'/final.mdl ark:- ark:-|')  if k in fea} # Note that I'm copying only the aligments of the loaded fea
 fea={k: v for k, v in fea.items() if k in lab} # This way I remove all the features without an aligment (see log file in alidir "Did not Succeded")

 count=0
 end_snt=0
 end_index=[]
 snt_name=[]
 for k in sorted(fea.keys(), key=lambda k: len(fea[k])):
     if count==0:
         count=1
         fea_conc=fea[k]
         lab_conc=lab[k]
         end_snt=end_snt+fea[k].shape[0]-left
     else:
         fea_conc=np.concatenate([fea_conc,fea[k]],axis=0)
         lab_conc=np.concatenate([lab_conc,lab[k]],axis=0)
         end_snt=end_snt+fea[k].shape[0]
 
        
     end_index.append(end_snt) 
     snt_name.append(k)
     
 end_index[-1]=end_index[-1]-right
    
 return [snt_name,fea_conc,lab_conc,end_index] 
Exemple #6
0
def load_dataset(fea_scp,fea_opts,lab_folder,lab_opts,left,right):
    
 fea= { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats scp:'+fea_scp+' ark:- |'+fea_opts) }
 lab= { k:v for k,v in kaldi_io.read_vec_int_ark('gunzip -c '+lab_folder+'/ali*.gz | '+lab_opts+' '+lab_folder+'/final.mdl ark:- ark:-|')  if k in fea} # Note that I'm copying only the aligments of the loaded fea
 fea={k: v for k, v in fea.items() if k in lab} # This way I remove all the features without an aligment (see log file in alidir "Did not Succeded")

 count=0
 end_snt=0
 end_index=[]
 snt_name=[]
 for k in sorted(fea.keys(), key=lambda k: len(fea[k])):
     if count==0:
         count=1
         fea_conc=fea[k]
         lab_conc=lab[k]
         end_snt=end_snt+fea[k].shape[0]-left
     else:
         fea_conc=np.concatenate([fea_conc,fea[k]],axis=0)
         lab_conc=np.concatenate([lab_conc,lab[k]],axis=0)
         end_snt=end_snt+fea[k].shape[0]
 
        
     end_index.append(end_snt) 
     snt_name.append(k)
     
 end_index[-1]=end_index[-1]-right
    
 return [snt_name,fea_conc,lab_conc,end_index] 
Exemple #7
0
def run(config):
    # Load the nnet model
    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'],
                            nnet['num_layers'], nnet['hidden_dim'],
                            nnet['num_classes'])
    model.load_state_dict(nnet['model_state_dict'])

    # Load alignment
    ali_files = [
        os.path.join(config.ali_dir, f) for f in listdir(config.ali_dir)
        if f.startswith('ali.')
    ]
    pdf_ali_dict = {}

    for file in ali_files:
        if config.ali_type == "pdf":
            pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(config.ali_dir, "final.mdl"), file)
        else:
            pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(config.ali_dir, "final.mdl"), file)
        pdf_ali_dict.update(
            {u: d - 1
             for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)})

    # Load feature stuff
    feats_config = pickle.load(open(config.egs_config, 'rb'))
    if feats_config['feat_type']:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if config.override_trans_path is not None:
        trans_path = config.override_trans_path

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(
            trans_path, config.scp)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, config.scp)
    else:
        cmd = config.scp

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(
            context[0], context[1])

    # Get the posterior
    fer_dict = {}
    for utt_id, mat in kaldi_io.read_mat_ark(cmd):
        out = model(Variable(torch.FloatTensor(mat)))
        out = softmax(out[1].data.numpy())
        als = pdf_ali_dict[utt_id]
        preds = np.argmax(out, axis=1)
        err = (float(preds.shape[0]) - float(np.sum(np.equal(
            preds, als)))) * 100 / float(preds.shape[0])
        fer_dict[utt_id] = err

    return fer_dict
Exemple #8
0
def err(flags_fname, scores_fname, threshold, misp_trend):
    flags_dict = {k: v for k, v in kio.read_vec_int_ark(flags_fname)}

    far = np.zeros_like(threshold, dtype=np.int32)
    frr = np.zeros_like(threshold, dtype=np.int32)
    cnt_mp = 0
    cnt_cp = 0
    for uttid, scores in kio.read_vec_flt_ark(scores_fname):
        flags = flags_dict[uttid]

        for i in range(len(flags)):
            if flags[i] > 2:
                continue

            mp = misp_fn[misp_trend](scores[i], threshold)

            if flags[i] == 0:
                cnt_cp += 1
                # correct[mp == False] = correct[mp == False] + 1
                frr[mp == True] += 1
            elif flags[i] > 0:
                cnt_mp = cnt_mp + 1
                # correct[mp == True] = correct[mp == True] + 1
                far[mp == False] = far[mp == False] + 1
            else:
                print("Imposible?!")

    return (far / cnt_mp), (frr / cnt_cp)
Exemple #9
0
    def read_data(self):
        feat_path = os.path.join(self.recipe_dir, 'data', self.dset, 'feats.scp')
        if self.dset == 'train':
            label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'])
        else:
            label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'] + '_' + self.dset)
        feat_opts = "apply-cmvn --utt2spk=ark:{0} ark:{1} ark:- ark:- |". \
            format(os.path.join(self.recipe_dir, 'data', self.dset, 'utt2spk'),
                   os.path.join(self.recipe_dir, 'data', self.dset,
                                self.dset + '_cmvn_speaker.ark'))
        if self.cfg['feature_deltas']:
            feat_opts += " add-deltas --delta-order=2 ark:- ark:- |"
        if self.cfg['feature_context']:
            feat_opts += " splice-feats --left-context={0} --right-context={0} ark:- ark:- |". \
                format(str(self.cfg['feature_context']))
        label_opts = 'ali-to-pdf' if self.cfg['task'] == 'classification' else 'ali-to-phones --per-frame'

        feats = {k: m for k, m in kaldi_io.read_mat_ark(
            'ark:copy-feats scp:{} ark:- | {}'.format(feat_path, feat_opts))}
        lab = {k: v for k, v in kaldi_io.read_vec_int_ark(
            'gunzip -c {0}/ali*.gz | {1} {0}/final.mdl ark:- ark:-|'.format(label_path, label_opts))
               if k in feats}
        feats = {k: v for k, v in feats.items() if k in lab}

        return feats, lab
Exemple #10
0
 def read_to_vec(self, type='int'):
     print("run", self.cmd)
     if type == 'int':
         generator = kaldi_io.read_vec_int_ark(self.cmd)
     if type == 'float':
         generator = kaldi_io.read_vec_flt_ark(self.cmd)
     result = {utt_id: np.array(vec) for utt_id, vec in generator}
     return result
Exemple #11
0
def get_labels(ali_dir, ali_type, config):
    ali_files = []
    all_ali_dirs = ali_dir.split(',')
    for ali_dir in all_ali_dirs:
        ali_files.extend([
            os.path.join(ali_dir, f) for f in listdir(ali_dir)
            if f.startswith('ali.')
        ])

    pdf_ali_dict = {}

    for file in ali_files:
        if ali_type == "pdf":
            pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(ali_dir, "final.mdl"), file)
            if config.notruncpad:
                pdf_ali_dict.update({
                    u + ".pt": torch.FloatTensor(d - 0).long()
                    for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)
                })
            else:
                pdf_ali_dict.update({
                    u + ".pt": F.pad(
                        torch.FloatTensor(d - 0).long(),
                        (0, config.max_seq_len - d.shape[0]))
                    for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)
                })
        else:
            pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(ali_dir, "final.mdl"), file)
            if config.notruncpad:
                pdf_ali_dict.update({
                    u + ".pt": torch.FloatTensor(d - 1).long()
                    for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)
                })
            else:
                pdf_ali_dict.update({
                    u + ".pt": F.pad(
                        torch.FloatTensor(d - 1).long(),
                        (0, config.max_seq_len - d.shape[0]))
                    for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)
                })

    torch.save(pdf_ali_dict, os.path.join(config.save_dir, 'labels.pkl'))
    def __init__(self, input_stream, output_stream, flags_stream, phone_map_fname,
                 tmp_dir, batch_size):
        self.phone_map = read_phone_map(phone_map_fname)

        input_it = kio.read_mat_ark(input_stream)
        output_dict = {k: v for k, v in kio.read_ali_ark(output_stream)}
        self.flags_dict = {k: v for k, v in kio.read_vec_int_ark(flags_stream)}

        self.flags = None

        gi.DataLoader.__init__(self, input_it, output_dict, tmp_dir, batch_size)
 def testInt32VectorReadWrite(self):
     """
     Test read/write for int32 vectors.
     """
     # read,
     i32_vec = {
         k: v
         for k, v in kaldi_io.read_vec_int_ark('tests/data/ali.ark')
     }  # binary,
     i32_vec2 = {
         k: v
         for k, v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark')
     }  # ascii,
     # re-save the data,
     with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark', 'wb') as f:
         for k, v in i32_vec.items():
             kaldi_io.write_vec_int(f, v, k)
     # read and make sure it is the same,
     for k, v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'):
         self.assertTrue(np.array_equal(v, i32_vec[k]),
                         msg="int32 vector same after re-saving")
def compute_prior(config):
    ali_files = [
        os.path.join(config.ali_dir, f) for f in listdir(config.ali_dir)
        if f.startswith('ali.')
    ]
    p = np.zeros(config.num_classes)

    for file in ali_files:
        if config.ali_type == "pdf":
            pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(config.ali_dir, "final.mdl"), file)
            for key, ali in kaldi_io.read_vec_int_ark(pdf_ali_file):
                for x in range(config.num_classes):
                    p[x] += len(np.where((ali) == x)[0])
        else:
            pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(config.ali_dir, "final.mdl"), file)
            for key, ali in kaldi_io.read_vec_int_ark(pdf_ali_file):
                for x in range(config.num_classes):
                    p[x] += len(np.where((ali - 1) == x)[0])

    return np.log(p / np.sum(p))
def read_kaldi_lab(kaldi_ali, kaldi_lab_opts):
    """Read labels in kaldi format.

    Uses kaldi IO.

    Arguments
    ---------
    kaldi_ali : str
        Path to directory where kaldi alignents are stored.
    kaldi_lab_opts : str
        A string that contains the options for reading the kaldi alignments.

    Returns
    -------
    lab : dict
        A dictionary contaning the labels.

    Note
    ----
    This depends on kaldi-io-for-python. Install it separately.
    See: https://github.com/vesis84/kaldi-io-for-python

    Example
    -------
    This example requires kaldi files.
    ```
    lab_folder = '/home/kaldi/egs/TIMIT/s5/exp/dnn4_pretrain-dbn_dnn_ali'
    read_kaldi_lab(lab_folder, 'ali-to-pdf')
    ```
    """
    # EXTRA TOOLS
    try:
        import kaldi_io
    except ImportError:
        raise ImportError("Could not import kaldi_io. Install it to use this.")
    # Reading the Kaldi labels
    lab = {
        k: v
        for k, v in kaldi_io.read_vec_int_ark(
            "gunzip -c "
            + kaldi_ali
            + "/ali*.gz | "
            + kaldi_lab_opts
            + " "
            + kaldi_ali
            + "/final.mdl ark:- ark:-|",
        )
    }
    return lab
Exemple #16
0
def run(config, post_dict):
    ali_files = [os.path.join(config.ali_dir, f) for f in listdir(config.ali_dir) if f.endswith('.gz')]
    fer_dict = {}
    for file in ali_files:
        pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format(
            os.path.join(config.ali_dir, "final.mdl"),
            file)
        pdf_ali_dict = {u: d for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)}

        for key in pdf_ali_dict:
            als = pdf_ali_dict[key]
            preds = np.argmax(post_dict[key], axis=1)
            err = (float(preds.shape[0]) - float(np.sum(np.equal(preds, als)))) * 100 / float(preds.shape[0])
            fer_dict[key] = err

    return fer_dict
Exemple #17
0
    def __init__(self, phone_label=None, feats=None, transform=None):
        """
        Args:
            phone_label (dict): utt to frame label.
            feats (dict): utt to frame features.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if Phone_cla_Dataset.class_trans_vector == None:
            Phone_cla_Dataset.class_trans_vector = np.vectorize(
                Phone_cla_Dataset.class_trans)
        if Phone_cla_Dataset.maxClassNum == -1:
            Phone_cla_Dataset.maxClassNum = max(
                list(Data_show.phone2class.values())) + 1

        if phone_label == None or feats == None:
            self.phone_label = {
                u: d
                for u, d in kaldi_io.read_vec_int_ark("feats/ali.1.ph")
            }
            self.feats = {
                u: d
                for u, d in kaldi_io.read_mat_scp("feats/feats.scp")
            }
        else:
            self.phone_label = phone_label
            self.feats = feats

        self.feats_list = []
        self.phone_label_list = []

        self.transform = transform

        for utt, feat in feats.items():
            if utt in phone_label:
                self.feats_list.append(feat)
                a = np.zeros(feat.shape[0], int)
                for i in range(a.shape[0]):
                    a[i] = phone_label[utt][(i) // 3]
                self.phone_label_list.append(
                    Phone_cla_Dataset.class_trans_vector(a))

        self.feats_nd = np.concatenate(tuple(self.feats_list))
        self.phone_label_nd = np.concatenate(tuple(self.phone_label_list))
def get_phoneme_labels(ali_dir):
    ali_files = []
    all_ali_dirs = ali_dir.split(',')
    for ali_dir in all_ali_dirs:
        ali_files.extend([
            os.path.join(ali_dir, f) for f in os.listdir(ali_dir)
            if f.startswith('ali.')
        ])

    pdf_ali_dict = {}

    for file in ali_files:
        pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format(
            os.path.join(ali_dir, "final.mdl"), file)
        pdf_ali_dict.update(
            {u: d
             for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)})

    return pdf_ali_dict
def get_labels(ali_dir, ali_type):
    ali_files = [
        os.path.join(ali_dir, f) for f in listdir(ali_dir)
        if f.startswith('ali.')
    ]
    pdf_ali_dict = {}

    for file in ali_files:
        if ali_type == "pdf":
            pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(ali_dir, "final.mdl"), file)
        else:
            pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format(
                os.path.join(ali_dir, "final.mdl"), file)
        pdf_ali_dict.update(
            {u: d - 1
             for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)})

    return pdf_ali_dict
    def testPipeReadWrite(self):
        """
        Test read/write for pipes.

        Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct.
        """
        # the following line disables 'stderr' forwarding, comment it for DEBUG,
        with open("/dev/null","w") as sys.stderr:
            # read,
            flt_mat4 = { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats ark:tests/data/feats.ark ark:- |') }
            # write to pipe,
            with kaldi_io.open_or_fd('ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark','wb') as f:
                for k,m in flt_mat4.items(): kaldi_io.write_mat(f, m, k)
            # read it again and compare,
            for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat_pipe.ark'):
                self.assertTrue(np.array_equal(m, flt_mat4[k]),"flt. matrix same after read/write via pipe")

            # read some other formats from pipe,
            i32_vec3 = { k:v for k,v in kaldi_io.read_vec_int_ark('ark:copy-int-vector ark:tests/data/ali.ark ark:- |') }
            flt_vec4 = { k:v for k,v in kaldi_io.read_vec_flt_ark('ark:copy-vector ark:tests/data/conf.ark ark:- |') }
Exemple #21
0
    def read_custom_feats(self, custom_feats_ark):
        feat_path = custom_feats_ark
        if self.dset == 'train':
            label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'])
        else:
            label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'] + '_' + self.dset)
        feat_opts = ''
        if self.cfg['feature_deltas']:
            feat_opts += " add-deltas --delta-order=2 ark:- ark:- |"
        if self.cfg['feature_context']:
            feat_opts += " splice-feats --left-context={0} --right-context={0} ark:- ark:- |". \
                format(str(self.cfg['feature_context']))
        label_opts = 'ali-to-pdf' if self.cfg['task'] == 'classification' else 'ali-to-phones --per-frame'

        feats = {k: m for k, m in kaldi_io.read_mat_ark(
            'ark:copy-feats ark:{} ark:- | {}'.format(feat_path, feat_opts))}
        lab = {k: v for k, v in kaldi_io.read_vec_int_ark(
            'gunzip -c {0}/ali*.gz | {1} {0}/final.mdl ark:- ark:-|'.format(label_path, label_opts))
               if k in feats}
        feats = {k: v for k, v in feats.items() if k in lab}

        return feats, lab
    def testPipeReadWrite(self):
        """
        Test read/write for pipes.

        Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct.
        """
        # the following line disables 'stderr' forwarding, comment it for DEBUG,
        with open("/dev/null", "w") as sys.stderr:
            # read,
            flt_mat4 = {
                k: m
                for k, m in kaldi_io.read_mat_ark(
                    'ark:copy-feats ark:tests/data/feats.ark ark:- |')
            }
            # write to pipe,
            with kaldi_io.open_or_fd(
                    'ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark',
                    'wb') as f:
                for k, m in flt_mat4.items():
                    kaldi_io.write_mat(f, m, k)
            # read it again and compare,
            for k, m in kaldi_io.read_mat_ark(
                    'tests/data_re-saved/mat_pipe.ark'):
                self.assertTrue(np.array_equal(m, flt_mat4[k]),
                                "flt. matrix same after read/write via pipe")

            # read some other formats from pipe,
            i32_vec3 = {
                k: v
                for k, v in kaldi_io.read_vec_int_ark(
                    'ark:copy-int-vector ark:tests/data/ali.ark ark:- |')
            }
            flt_vec4 = {
                k: v
                for k, v in kaldi_io.read_vec_flt_ark(
                    'ark:copy-vector ark:tests/data/conf.ark ark:- |')
            }
Exemple #23
0
    def save_smoothed_feats(self):
        feat_path = os.path.join(self.recipe_dir, 'data', self.dset, 'feats.scp')
        if self.dset == 'train':
            label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'])
        else:
            label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'] + '_' + self.dset)
        feat_opts = "apply-cmvn --utt2spk=ark:{0} ark:{1} ark:- ark:- |". \
            format(os.path.join(self.recipe_dir, 'data', self.dset, 'utt2spk'),
                   os.path.join(self.recipe_dir, 'data', self.dset,
                                self.dset + '_cmvn_speaker.ark'))
        label_opts = 'ali-to-pdf' if self.cfg['task'] == 'classification' else 'ali-to-phones --per-frame'
        feats = {k: m for k, m in kaldi_io.read_mat_ark(
            'ark:copy-feats scp:{} ark:- | {}'.format(feat_path, feat_opts))}
        lab = {k: v for k, v in kaldi_io.read_vec_int_ark(
            'gunzip -c {0}/ali*.gz | {1} {0}/final.mdl ark:- ark:-|'.format(label_path, label_opts))
               if k in feats}
        feats = {k: v for k, v in feats.items() if k in lab}
        fname = os.path.join(self.recipe_dir, 'data', self.dset, 'smoothed.ark')
        f = kaldi_io.open_or_fd(fname, 'wb')
        for key in tqdm(feats):
            tmp = smooth_acoustic(feats[key])
            kaldi_io.write_mat(f, tmp, key)

        return fname
Exemple #24
0
    root.ark_path = kwargs.pop('ark_path', None)


cmd_gmm_info = lambda: '%s/src/gmmbin/gmm-info --print-args=false %s | grep pdfs' % (
    root.kaldi_dir, root.mdl_path)
cmd_ali_pdf = lambda: '%s/src/bin/ali-to-pdf --print-args=false %s "ark:gunzip -c %s|" ark,t:-' % (
    root.kaldi_dir, root.mdl_path, root.aligz_path)
show_alignments = lambda: '%s/src/bin/show-alignments --print-args=false %s %s "ark:gunzip -c %s|"' % (
    root.kaldi_dir, root.phones_path, root.mdl_path, root.aligz_path)
show_transitions = lambda: '%s/src/bin/show-transitions --print-args=false %s %s' % (
    root.kaldi_dir, root.phones_path, root.mdl_path)
copy_int_vector = lambda: '%s/src/bin/copy-int-vector --print-args=false "ark:gunzip -c %s|" ark,t:-' % (
    root.kaldi_dir, root.aligz_path)
alignment = lambda: {
    k: v
    for k, v in kaldi_io.read_vec_int_ark(root.aligz_path)
}


def numpdfs():
    numpdfs = 0
    with subprocess.Popen(cmd_gmm_info(), stdout=subprocess.PIPE,
                          shell=True) as proc:
        numpdfs = int(proc.stdout.readline().decode().strip().split(' ')[-1])
    return numpdfs


def transid2info():
    transid2info = {}
    with subprocess.Popen(show_transitions(),
                          stdout=subprocess.PIPE,
Exemple #25
0
#!/usr/bin/env python

import numpy as np
import kaldi_io

print('testing int32-vector i/o')
i32_vec = {k: v
           for k, v in kaldi_io.read_vec_int_ark('data/ali.ark')}  # binary,
i32_vec2 = {k: v
            for k, v in kaldi_io.read_vec_int_ark('data/ali_ascii.ark')
            }  # ascii,
# - store,
with kaldi_io.open_or_fd('data_re-saved/ali.ark', 'wb') as f:
    for k, v in i32_vec.items():
        kaldi_io.write_vec_int(f, v, k)
# - read and compare,
for k, v in kaldi_io.read_vec_int_ark('data_re-saved/ali.ark'):
    assert (np.array_equal(v, i32_vec[k]))

print('testing float-vector i/o')
flt_vec = {k: v for k, v in kaldi_io.read_vec_flt_scp('data/conf.scp')}  # scp,
flt_vec2 = {k: v
            for k, v in kaldi_io.read_vec_flt_ark('data/conf.ark')
            }  # binary-ark,
flt_vec3 = {k: v
            for k, v in kaldi_io.read_vec_flt_ark('data/conf_ascii.ark')
            }  # ascii-ark,
# - store,
with kaldi_io.open_or_fd('data_re-saved/conf.ark', 'wb') as f:
    for k, v in flt_vec.items():
        kaldi_io.write_vec_flt(f, v, k)
Exemple #26
0
def text_generator(path, name):
    generator = kaldi_io.read_vec_int_ark(path)
    return prepend_generator(generator, name)
parser.add_argument('eval_dir')
args = parser.parse_args()

idx_to_phn_name_file = os.path.join(args.exp_dir, 'phn_sil_to_idx.txt')
phn_to_idx_file = os.path.join(args.exp_dir, 'phn_sil_to_idx.int')
pfeats_name_to_idx_file = os.path.join(args.exp_dir, 'pfeats_name_to_idx.txt')
out_file = os.path.join(args.eval_dir, 'accuracy.txt')
output = open(out_file, 'w')

# Mappings
idx_to_pfeats_name = read_inv_phone_map(pfeats_name_to_idx_file)
idx_to_phn_name = read_inv_phone_map(idx_to_phn_name_file)
pfeats_map = PFeatsMap(phn_to_idx_file, args.lang)

# Inputs
aligns_it = kio.read_vec_int_ark(args.align)
pfeats_it = kio.read_mat_ark(args.pfeats)

# Counters
phn_cnt = np.zeros(pfeats_map.phn_dim(), dtype=np.int)
phn_correct = np.zeros_like(phn_cnt)
pfeats_correct = np.zeros(pfeats_map.pfeats_dim(), dtype=np.int)

# Evaluate accuracy
for utt_phones, utt_pfeats_real in zip(aligns_it, pfeats_it):
    for phone, pfeats_real in zip(utt_phones[1], utt_pfeats_real[1]):
        pfeats_real = np.exp(pfeats_real)
        if not pfeats_map.is_phn_valid(phone):
            continue

        pfeats_true = pfeats_map.phn_to_pfeats(phone)
Exemple #28
0
    Id2LoglikeAMModel.add_args(parser)
    parser.add_argument('ali_dir')
    parser.add_argument('--max_pdf_id',
                        type=int,
                        default=None,
                        help="Maximum pdf_id")
    args = parser.parse_args()

    ali_stretch_model = AliStretchModel.build(args, load_from_cashe=False)
    id2ll_model = Id2LoglikeAMModel.build(args, load_from_cashe=False)

    logger.info(f"Loading {args.ali_dir}/ali.*.gz")
    utt2ali = {
        key: ali
        for key, ali in tqdm(
            kaldi_io.read_vec_int_ark(
                f'ark: gunzip -c {args.ali_dir}/ali_pdf.1.gz|'))
    }
    i = 0
    for key, ali in tqdm(utt2ali.items()):
        i += 1
        ali_stretch_model.add_utts(ali)
    logger.info(f"AliStretchModel processed {i} utterances")
    ali_stretch_model.compute()
    ali_stretch_model.save_to_file()

    logger.info(f"Loaded {len(utt2ali)} alis")
    logger.info(f"Loading logprobs and train model")
    i = 0
    for k, m in tqdm(
            kaldi_io.read_mat_ark(f'ark: cat {args.ali_dir}/output.1.ark |'),
            total=len(utt2ali)):
            maxscore = float("-inf")
            targetscore = 0

# pdb.set_trace()

# if 'wrong' == wrong_or_correct:
#   for uttid in wrong_uttids:
#     print(uttid)
# else:
#   for uttid in correct_uttids:
#     print(uttid)

numvoicedframes_correct = dict()
numvoicedframes_wrong = dict()

for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2vad_scp):
    assert (uttid not in numvoicedframes_correct
            ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_scp)
    assert (uttid not in numvoicedframes_wrong
            ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_scp)
    if uttid in correct_uttids:
        numvoicedframes_correct[uttid] = (np.sum(vad_vec), vad_vec.shape[0])
    if uttid in wrong_uttids:
        numvoicedframes_wrong[uttid] = (np.sum(vad_vec), vad_vec.shape[0])

draw_histogram(
    numvoicedframes_correct, 0,
    "distribution of number of voiced frames per utterance(correct utterance)")
print("The nvoiced/nwhole ratio is %.4f" % get_ratio(numvoicedframes_correct))

draw_histogram(
def main():
    #assert len(sys.argv) == 2, "Improper number of arguments."

    #ipath2vad_scp=sys.argv[1]
    # for debugging
    #ipath2vad_scp="ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/_vad/vad_dev_1s.1.scp ark,t:-|"

    ipath2train_vad_scp = "ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/data/train/vad.scp ark,t:- |"
    ipath2dev1s_vad_scp = "ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/data/dev_1s/vad.scp ark,t:- |"

    # use dict rather than list for further investigation of some specific utterance
    # but there is no need to realize this so far
    train_numvoicedframes = dict()

    for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2train_vad_scp):
        assert (
            uttid not in train_numvoicedframes
        ), "Duplicated utterance %s in %s" % (uttid, ipath2train_vad_scp)
        train_numvoicedframes[uttid] = (np.sum(vad_vec), vad_vec.shape[0])

    dev1s_numvoicedframes = dict()

    for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2dev1s_vad_scp):
        assert (
            uttid not in dev1s_numvoicedframes
        ), "Duplicated utterance %s in %s" % (uttid, ipath2dev1s_vad_scp)
        dev1s_numvoicedframes[uttid] = (np.sum(vad_vec), vad_vec.shape[0])

    ## draw histogram for voiced frames for train
    #draw_histogram(train_numvoicedframes, 0, "distribution of number of voiced frames per utterance")

    ## draw histogram of voiced frames for dev_1s

    idx = 0
    train_nframe_list = [
        train_numvoicedframes[uttid][idx]
        for uttid in train_numvoicedframes.keys()
    ]
    train_num_bins = range(0, 1000)

    dev1s_nframe_list = [
        dev1s_numvoicedframes[uttid][idx]
        for uttid in dev1s_numvoicedframes.keys()
    ]
    dev1s_num_bins = range(0, 1000)

    #num_bins = 100
    title = "distribution of number of frames per utterance"
    plt.hist(train_nframe_list, train_num_bins, facecolor='blue', alpha=0.5)
    plt.hist(dev1s_nframe_list, dev1s_num_bins, facecolor='red', alpha=0.5)
    plt.xlabel("number of frames per utterance")
    plt.ylabel("number of utterance")
    plt.title(title)
    plt.show()

    ## draw histogram for all frames
    #draw_histogram(numvoicedframes, 1, "distrbution of number of frames per utterance")

    #pdb.set_trace()
    # get voiced/whole frames ratio
    print("The nvoiced/nwhole ratio is %.4f" % get_ratio(numvoicedframes))
parser.add_argument('exp_dir')
parser.add_argument('eval_dir')
args = parser.parse_args()

os.makedirs(args.eval_dir, exist_ok=True)
model_fname = os.path.join(args.exp_dir, 'final.h5')
phn_map_fname = os.path.join(args.exp_dir, 'phn_sil_to_idx.int')
score_fname = os.path.join(args.eval_dir, 'score.ark')
score_txt_fname = os.path.join(args.eval_dir, 'score.txt')
score_txt = open(score_txt_fname, 'w')

# Load phone map
phn_map = read_phone_map(phn_map_fname)

# Load kaldi files
flags_dict = {k: v for k, v in kio.read_vec_int_ark(args.flags)}
ali_dict = {k: v for k, v in kio.read_ali_ark(args.phones)}
feats_it = kio.read_mat_ark(args.feats)

# Load classifier model
model = MispModel.load(model_fname)

with open(score_fname, 'wb') as f:
    for utt, feats in feats_it:
        # Workaround to deal with missing alignments
        if utt not in ali_dict:
            continue

        print(utt, end=' ', file=score_txt)
        flags = flags_dict[utt]
        ali = ali_dict[utt]
args = parser.parse_args()

# Prepere files
os.makedirs(args.res_dir, exist_ok=True)
feats_fd = sys.stdin.buffer
flags_fname = os.path.join(args.data_dir, 'text_ext_flags')
ali_force_frame_fname = os.path.join(args.cbps_dir,
                                     'force_ali_test/ali_frames.gz')
ali_force_fname = os.path.join(args.cbps_dir, 'force_ali_test/ali_pdf.gz')
score_fname = os.path.join(args.res_dir, 'score.ark')
score_txt_fname = os.path.join(args.res_dir, 'score.txt')
score_txt = open(score_txt_fname, 'w')
cmp_fd = open('test/cmp_gop.txt', 'w')

# Load kaldi files
flags_it = kio.read_vec_int_ark(flags_fname)
ali_force_it = kio.read_ali_ark(ali_force_fname)
ali_force_frm_it = kio.read_ali_ark(ali_force_frame_fname)
feats_it = kio.read_mat_ark(feats_fd)

with open(score_fname, 'wb') as f:
    for flags_t, ali_force_t, ali_force_frm_t, feats_t in zip(
            flags_it, ali_force_it, ali_force_frm_it, feats_it):
        # Unpack each tuple
        utt, flags = flags_t
        _, ali_force = ali_force_t
        _, ali_force_frm = ali_force_frm_t
        _, feats = feats_t

        # Get only features for corresponding states in alignments
        probs_force = hlp.np_pick(feats, ali_force)
Exemple #33
0
def load_dataset(fea_scp,
                 fea_opts,
                 lab_folder,
                 lab_opts,
                 left,
                 right,
                 max_sequence_length,
                 fea_only=False):

    fea = {
        k: m
        for k, m in kaldi_io.read_mat_ark('ark:copy-feats scp:' + fea_scp +
                                          ' ark:- |' + fea_opts)
    }

    if not fea_only:
        lab = {
            k: v
            for k, v in kaldi_io.read_vec_int_ark('gunzip -c ' + lab_folder +
                                                  '/ali*.gz | ' + lab_opts +
                                                  ' ' + lab_folder +
                                                  '/final.mdl ark:- ark:-|')
            if k in fea
        }  # Note that I'm copying only the aligments of the loaded fea
        fea = {
            k: v
            for k, v in fea.items() if k in lab
        }  # This way I remove all the features without an aligment (see log file in alidir "Did not Succeded")

    end_snt = 0
    end_index = []
    snt_name = []
    fea_conc = []
    lab_conc = []

    tmp = 0
    for k in sorted(sorted(fea.keys()), key=lambda k: len(fea[k])):

        #####
        # If the sequence length is above the threshold, we split it with a minimal length max/4
        # If max length = 500, then the split will start at 500 + (500/4) = 625.
        # A seq of length 625 will be splitted in one of 500 and one of 125

        if (len(fea[k]) > max_sequence_length) and max_sequence_length > 0:

            fea_chunked = []
            lab_chunked = []

            for i in range((len(fea[k]) + max_sequence_length - 1) //
                           max_sequence_length):
                if (len(fea[k][i * max_sequence_length:]) >
                        max_sequence_length + (max_sequence_length / 4)):
                    fea_chunked.append(fea[k][i * max_sequence_length:(i + 1) *
                                              max_sequence_length])
                    if not fea_only:
                        lab_chunked.append(
                            lab[k][i * max_sequence_length:(i + 1) *
                                   max_sequence_length])
                    else:
                        lab_chunked.append(
                            np.zeros((fea[k][i * max_sequence_length:(i + 1) *
                                             max_sequence_length].shape[0], )))
                else:
                    fea_chunked.append(fea[k][i * max_sequence_length:])
                    if not fea_only:
                        lab_chunked.append(lab[k][i * max_sequence_length:])
                    else:
                        lab_chunked.append(
                            np.zeros(
                                (fea[k][i * max_sequence_length:].shape[0], )))
                    break

            for j in range(0, len(fea_chunked)):
                fea_conc.append(fea_chunked[j])
                lab_conc.append(lab_chunked[j])
                snt_name.append(k + '_split' + str(j))

        else:
            fea_conc.append(fea[k])
            if not fea_only:
                lab_conc.append(lab[k])
            else:
                lab_conc.append(np.zeros((fea[k].shape[0], )))
            snt_name.append(k)

        tmp += 1

    fea_zipped = zip(fea_conc, lab_conc)
    fea_sorted = sorted(fea_zipped, key=lambda x: x[0].shape[0])
    fea_conc, lab_conc = zip(*fea_sorted)

    for entry in fea_conc:
        end_snt = end_snt + entry.shape[0]
        end_index.append(end_snt)

    fea_conc = np.concatenate(fea_conc)
    lab_conc = np.concatenate(lab_conc)

    return [snt_name, fea_conc, lab_conc, np.asarray(end_index)]