Esempio n. 1
0
    def _create_and_save_stats(self, mat, output):
        # tmp_mean = np.mean(mat, axis=0)
        # tmp_std = np.std(mat, axis=0)

        # get global mean and std
        num_samples = np.sum(mat[:, 0])
        dim = int((mat.shape[1] - 1) / 2)

        # global mean
        tmp_mean = np.sum((np.expand_dims(mat[:, 0], 1) * mat[:, 1:(dim + 1)]),
                          axis=0) / num_samples

        # global var
        tmp_std = np.sum(
            np.expand_dims(mat[:, 0], 1) *
            (mat[:, (dim + 1):] + np.square(mat[:, 1:(dim + 1)] - tmp_mean)),
            axis=0) / num_samples
        tmp_std = np.sqrt(tmp_std)

        # saving to stats matrix
        stats_dict = {
            'mean': np.expand_dims(tmp_mean, 1),
            'std': np.expand_dims(tmp_std, 1)
        }

        # change path
        p = Path(output)
        print(p.parent)

        with open(str(p.parent) + '/stats.mat', 'wb') as f:
            for key, mat in list(stats_dict.items()):
                kaldi_io.write_mat(f,
                                   mat.astype(np.float32, copy=False),
                                   key=key)
Esempio n. 2
0
 def _save_weights(self):
     weights_dict = {'weights': self._weights}
     with open('weights_tmp.mat', 'wb') as f:
         for key, mat in list(weights_dict.items()):
             kaldi_io.write_mat(f,
                                mat.astype(np.float32, copy=False),
                                key=key)
Esempio n. 3
0
    def create_dataset(self, nj, frac, path_data, output_folder):

        dataset = DataIterator(nj, path_data)

        data = []
        misc = Misc()
        count_size = 0
        while True:
            try:
                data_path = dataset.next_file()
                print(data_path)
                for key, mat in kaldi_io.read_mat_ark(data_path):
                    df_mat = pd.DataFrame(mat)
                    np_mat = df_mat.sample(frac=frac).values
                    # np_mat[:, 39] = misc.trans_vec_to_phones(np_mat[:, 39])
                    data.append(np_mat)

            except StopIteration:
                data_sample = np.concatenate(data)
                print(data_sample.shape)
                data_dict = {}
                data_dict['data'] = data_sample

                with open(output_folder + '/dataset.mat', 'wb') as f:
                    for key, mat in list(data_dict.items()):
                        kaldi_io.write_mat(f,
                                           mat.astype(np.float32, copy=False),
                                           key=key)

                break
Esempio n. 4
0
    def save_codebook(self, path):
        if not self._kaldi_formatting:
            raise TypeError
        # prepare codebook for saving
        path_new = str.split(path, '.')
        assert len(self._dict_codebook) > 0

        if len(self._dict_codebook) > 1:
            # prepare codebook for multiple vqs
            self.codebook = np.zeros([self._num_cluster,
                                      39])  # 39 is the feature dimension
            keys = ['energy', 'raw', 'delta', 'dd']
            dict_indicies = {
                'energy': [0, 13, 26],
                'raw': range(1, 13, 1),
                'delta': range(14, 26, 1),
                'dd': range(27, 39, 1)
            }
            for key in keys:
                self.codebook[:, dict_indicies[key]] = self._dict_codebook[
                    key].cluster_centers_
            path = path_new[0] + '_multiple.' + path_new[1]
        else:
            self.codebook = self._dict_codebook['simple'].cluster_centers_
            path = path_new[0] + '_single.' + path_new[1]

        with open(path, 'wb') as f:
            # print(self.codebook)
            kaldi_io.write_mat(f, self.codebook, key='cb')
Esempio n. 5
0
def compute_wav_path(wav, feat_scp, feat_ark, utt2dur, utt2num_frames):
    feat, duration = Make_Spect(wav_path=wav[1], windowsize=0.02, stride=0.01, duration=True)
    # np_fbank = Make_Fbank(filename=uid2path[uid], use_energy=True, nfilt=c.TDNN_FBANK_FILTER)

    len_vec = len(feat.tobytes())
    key = wav[0]
    kaldi_io.write_mat(feat_ark, feat, key=key)

    feat_scp.write(str(key) + ' ' + str(feat_ark.name) + ':' + str(feat_ark.tell() - len_vec - 15) + '\n')
    utt2dur.write('%s %.6f\n' % (str(key), duration))
    utt2num_frames.write('%s %d\n' % (str(key), len(feat)))
def MakeFeatsProcess(out_dir, proid, t_queue, e_queue):
    #  wav_scp = os.path.join(data_path, 'wav.scp')
    feat_scp = os.path.join(out_dir, 'feat.%d.scp' % proid)
    feat_ark = os.path.join(out_dir, 'feat.%d.ark' % proid)
    utt2dur = os.path.join(out_dir, 'utt2dur.%d' % proid)
    utt2num_frames = os.path.join(out_dir, 'utt2num_frames.%d' % proid)

    feat_scp = open(feat_scp, 'w')
    feat_ark = open(feat_ark, 'wb')
    utt2dur = open(utt2dur, 'w')
    utt2num_frames = open(utt2num_frames, 'w')

    while not t_queue.empty():
        wav = t_queue.get()

        pair = wav.split()
        try:
            feat, duration = Make_Spect(wav_path=pair[1],
                                        windowsize=0.02,
                                        stride=0.01,
                                        duration=True)
            # np_fbank = Make_Fbank(filename=uid2path[uid], use_energy=True, nfilt=c.TDNN_FBANK_FILTER)

            len_vec = len(feat.tobytes())
            key = pair[0]
            kaldi_io.write_mat(feat_ark, feat, key='')

            feat_scp.write(
                str(key) + ' ' + str(feat_ark.name) + ':' +
                str(feat_ark.tell() - len_vec - 15) + '\n')
            utt2dur.write('%s %.6f' % (str(key), duration))
            utt2num_frames.write('%s %d' % (str(key), len(feat)))

        except:
            print("Error: %s" % pair[0])
            e_queue.put(pair[0])

        # if self.queue.qsize() % 1000 == 0:
        print('==> Process %s: %s left' % (str(proid), str(t_queue.qsize())))

    feat_scp.close()
    feat_ark.close()
    utt2dur.close()
    utt2num_frames.close()

    print('>> Process {} finished!'.format(proid))
Esempio n. 7
0
    def create_p_s_m(self):

        self._feeder.init_train()

        # set model.train to False to avoid training
        # model.train = False
        while True:
            try:
                feat, labs = self._session.run(
                    [self._input_train[0], self._input_train[1]])

                nom_vq, den_vq = self._session.run(
                    self._train_dict['data_vq'],
                    feed_dict={
                        self._placeholders['ph_train']: False,
                        self._placeholders['ph_features']: feat,
                        self._placeholders['ph_labels']: labs
                    })

            except tf.errors.OutOfRangeError:
                nom_vq += Settings.delta
                den_vq += Settings.num_labels * Settings.delta
                prob = nom_vq / den_vq

                # saving matrix with kaldi_io
                save_dict = {'p_s_m': prob}
                print('Saving P(s_k|m_j)')
                with open('p_s_m.mat', 'wb') as f:
                    for key, mat in list(save_dict.items()):
                        kaldi_io.write_mat(f, mat, key=key)

                # reset den and nom, set variable
                self._session.run([
                    self._misc.reset_variable(self._variables['nominator']),
                    self._misc.reset_variable(self._variables['denominator']),
                    tf.assign(self._variables['conditioned_probability'], prob)
                ])

                break
Esempio n. 8
0
    def merge_data_phonemes(self, nj, path_data, path_phonemes, output_folder):
        assert type(path_data) == str and type(path_phonemes) == str

        # create Iterators
        dataset = DataIterator(self._nj,
                               path_data,
                               splice=self._splice,
                               cmvn=self._cmvn)
        phonemes = AlignmentIterator(nj, path_phonemes)

        # iterate through data
        count = 1
        tmp_dict = {}
        while True:
            try:
                for (key_data, mat_data), (key_pho, mat_pho) in zip(
                        kaldi_io.read_mat_ark(dataset.next_file()),
                        kaldi_io.read_ali_ark(phonemes.next_file())):
                    # check for same key
                    if key_data == key_pho:
                        print(key_data)
                        tmp_dict[key_data] = pd.concat(
                            [pd.DataFrame(mat_data),
                             pd.DataFrame(mat_pho)],
                            axis=1)

                with open(output_folder + '/feats_vq_' + str(count),
                          'wb') as f:
                    for key, mat in list(tmp_dict.items()):
                        kaldi_io.write_mat(f,
                                           mat.values.astype(np.float32,
                                                             copy=False),
                                           key=key)

                tmp_dict = {}
                count += 1

            except StopIteration:
                break
Esempio n. 9
0
def write_ark_scp(output_name, kmat_dict: dict):
    ark_scp_output = f"ark:| copy-feats --compress=true ark:- ark,scp:{output_name}.ark,{output_name}.scp"
    with open_or_fd(ark_scp_output, "wb") as f:
        for k, mat in kmat_dict.items():
            write_mat(f, mat, key=k)
Esempio n. 10
0
        cv_feat_keys, cv_feats = featureUtils.read_feats(Config.CV_FEATS)
        cv_ali_keys, cv_alis, _ = featureUtils.read_ali_and_compute_prior(
            Config.CV_ALIGNMENTS, None)

        utils.verify_order(cv_feat_keys, cv_ali_keys)
        eval_loss, eval_ce = dnn.eval(sess, cv_feats, cv_alis)
        print 'validation loss after model restore:', eval_loss, 'xent:', eval_ce

        print 'Forward pass test data'

        te_key, te_mat, te_feat_len = featureUtils.read_feats(
            Config.TEST_FEATS, True)
        out = dnn.forwardPass(sess, te_mat)
        out = np.array(out)
        # loading prior
        prior = np.loadtxt(
            '/speech1/DIT_PROJ/srini/PycharmProjects/tfkaldi-fork/prior.npy',
            dtype=np.float32)
        out /= prior
        np.where(out == 0, np.finfo(float).eps, out)
        out = np.log(out)
        prev = 0
        with open('out2.ark', 'wb') as f:
            for ran in range(len(te_key)):
                temp = out[0][prev:prev + te_feat_len[ran]]
                prev = prev + te_feat_len[ran]
                kaldi_io.write_mat(f, temp, key=te_key[ran])

    end_time = time.time()
    print 'seconds', (end_time - begin_time)
Esempio n. 11
0
    def concat_data(self, path_data, path_phonemes, output_folder):
        dataset = DataIterator(self._nj,
                               path_data,
                               splice=self._splice,
                               cmvn=self._cmvn)

        create_stats = True
        if path_data in ['test', 'dev']:
            create_stats = False

        # set dim
        dim = self._dim * (2 * self._splice + 1)

        print('Loading alignment dict')
        alignment_dict = {}
        for key, mat in kaldi_io.read_ali_ark(path_phonemes):
            alignment_dict[key] = mat

        print('Loading done')
        count = 1
        tmp_dict = {}
        # gather_stats: n, mean, var (nj rows)
        gather_stats = np.zeros([self._nj, 2 * dim + 1])
        gather_data = []
        print('Creating filtered training data and merge them with the labels')
        while True:
            try:
                for key, mat in kaldi_io.read_mat_ark(dataset.next_file()):
                    # we need to filter the training data because we don't have the alignments for all the
                    # training data. Therefor, we have to avoid to use this data for training our HMMs
                    # TODO Could also work with --> check performance difference
                    if key in list(alignment_dict.keys()) and \
                                    mat.shape[0] == alignment_dict[key].shape[0]:
                        tmp_dict[key] = pd.concat([
                            pd.DataFrame(mat),
                            pd.DataFrame(alignment_dict[key])
                        ],
                                                  axis=1)
                        gather_data.append(mat)

                od = collections.OrderedDict(sorted(tmp_dict.items()))

                # write filtered training data and the labels to files
                with open(output_folder + '/feats_vq_' + str(count),
                          'wb') as f:
                    for key, mat in list(od.items()):
                        kaldi_io.write_mat(f,
                                           mat.values.astype(np.float32,
                                                             copy=False),
                                           key=key)
                # write the filtered training data
                with open(output_folder + '/features_' + str(count),
                          'wb') as f:
                    for key, mat in list(od.items()):
                        kaldi_io.write_mat(
                            f,
                            mat.values.astype(np.float32, copy=False)[:, :dim],
                            key=key)
                tmp_dict = {}
                # save stats for single file
                tmp_data = np.concatenate(gather_data)
                gather_stats[count - 1,
                             0] = tmp_data.shape[0]  # add number of samples
                gather_stats[count - 1,
                             1:(dim + 1)] = np.mean(tmp_data,
                                                    axis=0)  # add mean of file
                gather_stats[count - 1,
                             (dim + 1):] = np.var(tmp_data,
                                                  axis=0)  # add var of file
                # print(gather_stats)
                count += 1
                gather_data = []  # reset gather_data

            except StopIteration:
                if create_stats:
                    print('Saving std and mean of data to stats.mat')
                    self._create_and_save_stats(gather_stats, output_folder)
                break
Esempio n. 12
0
    def vq_data(self, nj, data_folder, output_folder):
        # vqing traing data
        assert self.codebook.shape[0] > 0
        print('VQing training data...')

        dataset = DataIterator(nj, data_folder)

        keys = []
        dict_vq, dict_indicies = {}, {}
        if self._multiple:
            keys = ['energy', 'raw', 'delta', 'dd']
            dict_indicies = {
                'energy': [0, 13, 26],
                'raw': range(1, 13, 1),
                'delta': range(14, 26, 1),
                'dd': range(27, 39, 1)
            }
        else:
            keys = ['simple']
            dict_indicies = {'simple': range(0, 39)}

        for key in keys:
            dict_vq[key] = self.codebook[:, dict_indicies[key]]

        tmp_dict = {}
        labels_all = []
        phoneme_all = []
        count = 1
        while True:
            try:
                data_path = dataset.next_file()
                print("Data path is in ", data_path)
                for key, mat in kaldi_io.read_mat_ark(data_path):
                    if self._multiple:
                        # getting label for every vq
                        df = pd.DataFrame(
                            vq(whiten(mat[:, dict_indicies['energy']]),
                               dict_vq['energy'])[0][:, np.newaxis])
                        df = pd.concat([
                            df,
                            pd.DataFrame(
                                vq(whiten(mat[:, dict_indicies['raw']]),
                                   dict_vq['raw'])[0][:, np.newaxis])
                        ],
                                       axis=1)
                        df = pd.concat([
                            df,
                            pd.DataFrame(
                                vq(whiten(mat[:, dict_indicies['delta']]),
                                   dict_vq['delta'])[0][:, np.newaxis])
                        ],
                                       axis=1)
                        df = pd.concat([
                            df,
                            pd.DataFrame(
                                vq(whiten(mat[:, dict_indicies['dd']]),
                                   dict_vq['dd'])[0][:, np.newaxis])
                        ],
                                       axis=1)
                    else:
                        if self._whitening:
                            df = pd.DataFrame(
                                vq(whiten(mat[:, :39]),
                                   dict_vq['simple'])[0][:, np.newaxis])
                            labels_all.append(df.values)
                        else:
                            df = pd.DataFrame(
                                vq(mat[:, :39],
                                   dict_vq['simple'])[0][:, np.newaxis])
                            labels_all.append(df.values)

                        if np.shape(mat)[1] > 39:
                            phoneme_all.append(mat[:, 39])

                    # add to tmp_dict for later saving
                    tmp_dict[key] = df

                # ordered dict
                od = collections.OrderedDict(sorted(tmp_dict.items()))

                # save label-stream from vq
                with open(output_folder + '/feats_vq_' + str(count),
                          'wb') as f:
                    for key, mat in list(od.items()):
                        kaldi_io.write_mat(f,
                                           mat.values.astype(np.float32,
                                                             copy=False),
                                           key=key)

                tmp_dict = {}
                count += 1

            except StopIteration:
                # calc MI
                if False:
                    misc = Misc()
                    labels_all = np.concatenate(labels_all)
                    # labels_all = np.reshape(labels_all, [np.shape(labels_all)[0] * np.shape(labels_all)[1]],
                    #                         np.shape(labels_all)[2])
                    phoneme_all = np.concatenate(phoneme_all)
                    # phoneme_all = np.reshape(phoneme_all, [np.shape(phoneme_all)[0] * np.shape(phoneme_all)[1]],
                    #                          np.shape(phoneme_all)[2])
                    print(misc.calculate_mi(labels_all, phoneme_all))
                break
Esempio n. 13
0
    def do_inference(self, nj, input_folder, output_folder):
        """
        Does the inference of the model

        :param nj:              number of jobs (how the dataset is split in kaldi)
        :param input_folder:    path to the data folder to do the inference
        :param output_folder:   path to save the output of the inference
        """

        # create DataIterator for iterate through the split folder created by kaldi
        dataset = DataIterator(nj,
                               input_folder,
                               splice=self._splice,
                               cmvn=self._cmvn)

        dim = self._dim * (2 * self._splice + 1)

        # number iterator for counting, necessary for writing the matrices later
        iterator = iter([i for i in range(1, dataset.get_size() + 1)])

        features_all = {}
        phoneme_all = {}
        inferenced_data = {}  # storing the inferenced data
        check_data = {}
        output_all = {}

        while True:
            try:
                data_path = dataset.next_file()  # get path to data
                # print(data_path)
                # iterate through data
                for key, mat in kaldi_io.read_mat_ark(data_path):
                    inferenced_data[key] = self._do_single_inference(
                        mat[:, :dim])  # do inference for one batch
                    tmp = self._do_single_inference(mat[:, :dim])
                    # check_data[key] = [np.argmax(tmp[0], axis=1), np.argmax(tmp[1], axis=1),
                    #                    np.argmax(tmp[2], axis=1), self._dev_alignments[key]]
                    if np.shape(
                            mat
                    )[1] > dim:  # get statistics for mi (only if we input data + labels), for debugging
                        phoneme_all[key] = mat[:, dim]
                    # add for debugging, see below
                    output_all[key] = tmp

                od = collections.OrderedDict(sorted(inferenced_data.items()))

                # write posteriors (inferenced data) to files
                with open(output_folder + '/feats_vq_' + str(next(iterator)),
                          'wb') as f:
                    for key, mat in list(od.items()):
                        if self.transform:
                            kaldi_io.write_mat(f, mat, key=key)
                        else:
                            kaldi_io.write_mat(f, mat[:, np.newaxis], key=key)
                inferenced_data = {}  # reset dict

            except StopIteration:
                # debugging
                # gather_right = np.zeros(127)
                # gather_right.fill(1e-5)
                # gather_wrong = np.zeros(127)
                # gather_wrong.fill(1e-5)
                # gather_vq = np.zeros(127)
                # gather_vq.fill(1e-5)
                # gather_comb = np.zeros(127)
                # gather_comb.fill(1e-5)
                #
                # for key, entry in check_data.items():
                #     tmp_van = entry[0] == entry[3]  # right pred of vanilla
                #     tmp_vq = entry[1] == entry[3]  # right pred of vanilla
                #     tmp_comb = entry[2] == entry[3]  # right pred of vanilla
                #
                #     # np.max(np.expand_dims(~tmp_vq, 1) * output_all[key], axis=1)
                #
                #     comb_right = [t for t, x in enumerate(tmp_comb) if x]
                #     comb_wrong = [t for t, x in enumerate(~tmp_comb) if x]
                #     vq_right = [t for t, x in enumerate(tmp_vq) if x]
                #     vq_wrong = [t for t, x in enumerate(~tmp_vq) if x]
                #     van_right = [t for t, x in enumerate(tmp_van) if x]
                #     van_wrong = [t for t, x in enumerate(~tmp_van) if x]
                #
                #     list_vq = ~(entry[0] == entry[3]) == (entry[1] == entry[3])
                #     list_comb = (entry[0] == entry[3]) == ~(entry[2] == entry[3])
                #     ind_vq_true = [t for t, x in enumerate(list_vq) if x]
                #     ind_comb_true = [t for t, x in enumerate(list_comb) if x]
                #     ind_vq_false = [t for t, x in enumerate(list_vq) if not x]
                #     # est = output_all[key][1]
                #
                #
                #     # plt.subplot(2, 1, 1)
                #     # # plt.hist(np.ndarray.flatten(np.expand_dims(list_vq, 1) * output_all[key]), bins=100, range=[1e-15, 1])
                #     # plt.hist(-np.sum(np.log2(output_all[key][0]) * output_all[key][0], axis=1), bins=10)
                #     # plt.subplot(2, 1, 2)
                #     # # plt.hist(np.ndarray.flatten(np.expand_dims(~list_vq, 1) * output_all[key]), bins=100, range=[1e-15, 1])
                #     # plt.hist(-np.sum(np.log2(output_all[key][1]) * output_all[key][1], axis=1), bins=10)
                #     # plt.show()
                #
                #     print('right comb: ' + str(len(comb_right)))
                #     print('wrong comb: ' + str(len(comb_wrong)))
                #     print('right vq: ' + str(len(vq_right)))
                #     print('wrong vq: ' + str(len(vq_wrong)))
                #     print('right van: ' + str(len(van_right)))
                #     print('wrong van: ' + str(len(van_wrong)))
                #     # print(len(van_right) + len(van_wrong))
                #     # print(entry[2][van_wrong])
                #     gather_right[entry[3][comb_right]] += 1.0
                #     gather_wrong[entry[3][comb_wrong]] += 1.0
                #     gather_vq[entry[3][ind_vq_true]] += 1.0
                #     gather_comb[entry[3][ind_comb_true]] += 1.0
                #     # print(len(van_right) + len(van_wrong))
                #     # print(len(entry[2]))
                #     print(sum(list_comb) / len(entry[3]))
                #     print(sum(list_vq) / len(entry[3]))

                # plt.subplot(3, 1, 1)
                # plt.bar(range(0, 127), gather_right)
                # plt.subplot(3, 1, 2)
                # plt.bar(range(0, 127), gather_wrong)
                # plt.subplot(3, 1, 3)
                # plt.bar(range(0, 127), gather_vq)
                # plt.show()
                # print(check_data[0] == check_data[1])
                if False:
                    misc = Misc()
                    features_all = np.concatenate(features_all)
                    phoneme_all = np.expand_dims(np.concatenate(phoneme_all),
                                                 1)
                    phoneme_all = misc.trans_vec_to_phones(phoneme_all)
                    # print(misc.calculate_mi(features_all, phoneme_all))
                    mi, test_py, test_pw, test_pyw = self._session.run(
                        ["mutual_info:0", "p_y:0", "p_w:0", "p_yw:0"],
                        feed_dict={
                            "is_train:0": False,
                            "ph_features:0": features_all,
                            "ph_labels:0": phoneme_all
                        })
                    print(mi)
                    tmp_pywtest = pd.DataFrame(test_py)
                    tmp_pywtest.to_csv('py_inf.txt', header=False, index=False)
                    tmp_pywtest = pd.DataFrame(test_pw)
                    tmp_pywtest.to_csv('pw_inf.txt', header=False, index=False)
                    tmp_pywtest = pd.DataFrame(test_pyw)
                    tmp_pywtest.to_csv('pwy_inf.txt',
                                       header=False,
                                       index=False)

                break