Beispiel #1
0
 def hdf2k(self):
     """Converts hdf5 format to kaldi matrix."""
     with h5py.File(self.options.net_output) as i_file:
         with open(self.options.output_file, "w") as o_file:
             for key, item in i_file.items():
                 mat = item["data"]
                 kaldi_io.write_mat(o_file, mat.value, key)
Beispiel #2
0
def main():
    logging.basicConfig(format='[%(filename)s:%(lineno)d] %(message)s',
                        level=logging.WARN)
    parser = argparse.ArgumentParser(
        description="load .rec rawfile from first argument SCPPATH, \
            and convert all rec to scpfile-contained-utterence-id-indexed ark file to ARKPATH"
    )
    parser.add_argument("SCPPATH", help="scp file path")
    parser.add_argument("ARKPATH", help="ark file path")
    parser.add_argument("OUTPUTTYPE", help="raw | diff | baseline")
    args = parser.parse_args()
    logging.debug(args)
    SCP_FILEPATH = args.SCPPATH
    ARK_FILEPATH = args.ARKPATH
    OUTPUT_TYPE = args.OUTPUTTYPE
    if not ARK_FILEPATH.endswith('.ark') or not SCP_FILEPATH.endswith('.scp'):
        logging.error('extension error')
        exit()
    logging.debug(SCP_FILEPATH)
    uttid2recpath = io_helper.loadscp(SCP_FILEPATH)
    logging.debug(uttid2recpath)
    dataset = loaddata(uttid2recpath, OUTPUT_TYPE)
    with kaldi_io.open_or_fd(ARK_FILEPATH, 'wb') as f:
        for k, m in dataset.items():
            kaldi_io.write_mat(f, m, k)
Beispiel #3
0
def export2kaldi(databyid):
    kaldidata = databyid
    filter_field = ('Click1', 'Hush', 'Click2', 'Click3')  #, 'LongPress')
    #exclude_field = ('LongPress')
    test_size = 0.2
    trainset = []
    testset = []
    filtered_kaldidata = {}
    for k, v in kaldidata.items():
        label = k.split('_')[0]
        if label in filter_field:
            #if label not in exclude_field:
            if label not in filtered_kaldidata:
                filtered_kaldidata[label] = []
            filtered_kaldidata[label].append((k, v))
    for label in filtered_kaldidata:
        length = len(filtered_kaldidata[label])
        random.shuffle(filtered_kaldidata[label])
        testset.extend(filtered_kaldidata[label][:int(length * test_size)])
        trainset.extend(filtered_kaldidata[label][int(length * test_size):])
    with kaldi_io.open_or_fd('feats_train.ark', 'wb') as f:
        for k, m in trainset:
            kaldi_io.write_mat(f, m, k)
    with kaldi_io.open_or_fd('feats_test.ark', 'wb') as f:
        for k, m in testset:
            kaldi_io.write_mat(f, m, k)
Beispiel #4
0
 def hdf2k(self):
     """Converts hdf5 format to kaldi matrix."""
     with h5py.File(self.options.net_output) as i_file:
         with open(self.options.output_file, "w") as o_file:
             for key, item in i_file.items():
                 mat = item["data"]
                 kaldi_io.write_mat(o_file, mat.value, key)
Beispiel #5
0
def main():
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
    logging.info("CUDA_VISIBLE_DEVICES=" + os.environ.get("CUDA_VISIBLE_DEVICES", ""))
    logging.info("HOST=" + os.environ.get("HOST", ""))
    logging.info("SLURM_JOB_ID=" + os.environ.get("SLURM_JOB_ID", ""))

    model_dir = Path(args.model_dir)
    forward_dir = model_dir / "forward"
    aux_scp = kaldiio.load_scp(args.aux_scp)
    model = torch.load(model_dir / "model.pickle", map_location="cpu")
    model.eval()
    with torch.no_grad(), open(args.forward_ark, "wb") as f:
        for key, feat in kaldi_io.read_mat_ark(args.input_rs):
            aux = torch.from_numpy(aux_scp[key])
            logging.info("input: key={} feat={} aux={}".format(key, feat.shape, aux.shape))
            # feat is (time, freq) shape
            x = torch.from_numpy(feat.T).unsqueeze(0)
            if x.shape[2] < args.min_time_width:
                remain = args.min_time_width - x.shape[2] + 1
                lpad = torch.zeros(1, x.shape[1], remain / 2)
                rpad = torch.zeros(1, x.shape[1], remain / 2)
                x = torch.cat((lpad, x, rpad), dim=2)

            n_aux = aux.shape[0]
            # take center ivector frame
            if args.use_last_ivector:
                aux = aux[-1].unsqueeze(0)
            else:
                aux = aux[n_aux//2].unsqueeze(0)
            # forward
            y, _ = model(x, aux)
            y = torch.nn.functional.log_softmax(y, dim=1).squeeze(0)
            logging.info("output: {}".format(y.shape))
            kaldi_io.write_mat(f, y.numpy().T, key)
Beispiel #6
0
def npz2ark(npz_file, ark_file):
    '''load npz format and save as kaldi ark format'''
    print("Loading npz file...")
    feats = np.load(npz_file)['feats']
    utt_label = np.load(npz_file)['utt_label']

    utters = np.unique(utt_label)
    utt_data = {}
    for i in utters:
        utt_data[i] = []
    for i in range(len(feats)):
        key = utt_label[i]
        utt_data[key].append(feats[i])

    if not os.path.exists(os.path.dirname(ark_file)):
        os.makedirs(os.path.dirname(ark_file))

    pbar = tqdm(total=len(utt_data))
    with open(ark_file, 'wb') as f:
        for utt, data in utt_data.items():
            data = np.array(data)
            kaldi_io.write_mat(f, data, utt)
            pbar.update(1)
            pbar.set_description('generate utter {} of frames {}'.format(
                utt, data.shape[0]))
    pbar.close()
    print("Convert {} to {} ".format(npz_file, ark_file))
def extract(scp_file: str,
            output_ark: str,
            sr: int = None,
            win_length: int = 25,
            hop_length: int = 10,
            n_fft: int = 512):
    # Win length and hop_length are given in ms
    # scpfile is formatted as KEY VALUE where key is name and value is .wav filepath
    with open(scp_file, 'r') as rp, open(output_ark, 'wb') as wp:
        for line in tqdm(rp, total=get_num_lines(scp_file)):
            key, fpath = line.split()[:2]
            # Also supports 24 bit
            y, file_sr = sf.read(fpath, dtype='float32')
            # y, file_sr = librosa.load(fpath, sr=sr)
            # Adjust window length
            cur_win_length = file_sr * win_length // 1000
            cur_hop_length = file_sr * hop_length // 1000
            S = librosa.core.stft(y,
                                  n_fft=n_fft,
                                  win_length=cur_win_length,
                                  hop_length=cur_hop_length)
            S = np.log(np.abs(S)**2 + 1e-12)
            feature = S.transpose()
            #  components and acitivations
            kaldi_io.write_mat(wp, feature, key=key)
Beispiel #8
0
def run_kaldi(command, input_type, input_value):
    """Run provided Kaldi command, pass a tensor and get the resulting tensor

    Args:
        input_type: str
            'ark' or 'scp'
        input_value:
            Tensor for 'ark'
            string for 'scp' (path to an audio file)
    """
    import kaldi_io

    key = 'foo'
    process = subprocess.Popen(command,
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE)
    if input_type == 'ark':
        kaldi_io.write_mat(process.stdin, input_value.cpu().numpy(), key=key)
    elif input_type == 'scp':
        process.stdin.write(f'{key} {input_value}'.encode('utf8'))
    else:
        raise NotImplementedError('Unexpected type')
    process.stdin.close()
    result = dict(kaldi_io.read_mat_ark(process.stdout))['foo']
    return torch.from_numpy(result.copy())  # copy supresses some torch warning
 def testMatrixReadWrite(self):
     """
     Test read/write for float matrices.
     """
     # read,
     flt_mat = {
         k: m
         for k, m in kaldi_io.read_mat_scp('tests/data/feats_ascii.scp')
     }  # ascii-scp,
     flt_mat2 = {
         k: m
         for k, m in kaldi_io.read_mat_ark('tests/data/feats_ascii.ark')
     }  # ascii-ark,
     flt_mat3 = {
         k: m
         for k, m in kaldi_io.read_mat_ark('tests/data/feats.ark')
     }  # ascii-ark,
     # store,
     with kaldi_io.open_or_fd('tests/data_re-saved/mat.ark', 'wb') as f:
         for k, m in flt_mat3.items():
             kaldi_io.write_mat(f, m, k)
     # read and compare,
     for k, m in kaldi_io.read_mat_ark('tests/data_re-saved/mat.ark'):
         self.assertTrue(np.array_equal(m, flt_mat3[k]),
                         msg="flt. matrix same after re-saving")
Beispiel #10
0
def npz2ark(npz_path, kaldi_dir):
    print("loading...")
    feats = np.load(npz_path)['feats']
    utt_label = np.load(npz_path)['utt_label']
    utt_class = np.unique(utt_label)
    utt_data = {}
    for i in utt_class:
        utt_data[i] = []
    for i in range(len(feats)):
        key = utt_label[i]
        utt_data[key].append(feats[i])

    if not os.path.exists(kaldi_dir):
        os.makedirs(kaldi_dir)
    ark_path = kaldi_dir + os.sep + 'feats.ark'

    print("ark writing...")
    pbar = tqdm(total=len(utt_data))
    with open(ark_path, 'wb') as f:
        for utt, data in utt_data.items():
            data = np.array(data)
            kaldi_io.write_mat(f, data, utt)
            pbar.update(1)
            pbar.set_description('generate utter {} of frames {}'.format(
                utt, data.shape[0]))
    pbar.close()
    print("successfully save kaldi ark in {}".format(ark_path))
Beispiel #11
0
def forward_dct(args,
                cpc_model,
                device,
                data_loader,
                output_ark,
                output_scp,
                dct_dim=24):
    ''' forward with dct '''

    logger.info("Starting Forward Passing")
    cpc_model.eval()  # not training cdc model

    ark_scp_output = 'ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp
    with torch.no_grad():
        with ko.open_or_fd(ark_scp_output, 'wb') as f:
            for [utt_id, data] in data_loader:
                data = data.float().unsqueeze(1).to(
                    device)  # add channel dimension
                data = data.contiguous()
                hidden = cpc_model.init_hidden(len(data))
                output, hidden = cpc_model.predict(data, hidden)
                mat = output.squeeze(
                    0).cpu().numpy()  # kaldi io does not accept torch tensor
                dct_mat = fft.dct(mat, type=2, n=dct_dim)  # apply dct
                ko.write_mat(f, dct_mat, key=utt_id[0])
Beispiel #12
0
def make_feature(wav_path_list,
                 noise_wav_list,
                 feat_dir,
                 thread_num,
                 argument=False,
                 repeat_num=1):
    mag_ark_scp_output = 'ark:| copy-feats --compress=true ark:- ark,scp:{0}/feats{1}.ark,{0}/feats{1}.scp'.format(
        feat_dir, thread_num)
    ang_ark_scp_output = 'ark:| copy-feats --compress=true ark:- ark,scp:{0}/angles{1}.ark,{0}/angles{1}.scp'.format(
        feat_dir, thread_num)
    if argument:
        fwrite = open(os.path.join(feat_dir, 'db' + str(thread_num)), 'a')
    f_mag = kaldi_io.open_or_fd(mag_ark_scp_output, 'wb')
    f_ang = kaldi_io.open_or_fd(ang_ark_scp_output, 'wb')
    print("进入num循环")
    for num in range(repeat_num):
        for tmp in wav_path_list:
            uttid, wav_path = tmp
            clean = load_audio(wav_path)
            y = None
            print("argument = ", argument)
            while y is None:
                if argument:
                    print("argument=True")
                    noise_path = choice(noise_wav_list)
                    n = load_audio(noise_path[0])
                    db = np.random.uniform(low=0, high=20)
                    y = MakeMixture(clean, n, db)
                    uttid_new = uttid + '__mix{}'.format(num)
                    print(uttid_new + ' ' + str(db) + '\n')
                    fwrite.write(uttid_new + ' ' + str(db) + '\n')
                else:

                    y = clean
                    uttid_new = uttid
            # STFT
            print("y = ", y)
            if y is not None:
                D = librosa.stft(y,
                                 n_fft=512,
                                 hop_length=256,
                                 win_length=512,
                                 window=scipy.signal.hamming)
                spect = np.abs(D)
                angle = np.angle(D)
                print("创建STFT")
                ##feat = np.concatenate((spect, angle), axis=1)
                ##feat = feat.transpose((1, 0))
                kaldi_io.write_mat(f_mag,
                                   spect.transpose((1, 0)),
                                   key=uttid_new)
                kaldi_io.write_mat(f_ang,
                                   angle.transpose((1, 0)),
                                   key=uttid_new)
            else:
                print(noise_path, tmp, 'error')

    if argument:
        fwrite.close()
def write_kaldi(orig_feat_scp, ark_scp_output, max_len):
    """Write the slice feature matrix to ark_scp_output
    """
    with ko.open_or_fd(ark_scp_output,'wb') as f:
        for key,mat in ko.read_mat_scp(orig_feat_scp):
            tensor = tensor_cnn_utt(mat, max_len)
            if tensor.shape[1] != max_len:
                print(tensor.shape)
            ko.write_mat(f, tensor, key=key)
Beispiel #14
0
def write_mat(filename, temp_dict):
    try:
        with open(filename, 'wb') as f:
            for key, mat in temp_dict.iteritems():
                kaldi_io.write_mat(f, mat, key)
        return True
    except Exception, e:
        print(str(e))
        return False
Beispiel #15
0
def decode(args, dataset, model, priors, device='cpu'):
    '''
        Produce lattices from the input utterances.
    '''
    # This is all of the kaldi code we are calling. We are just piping out
    # out features to latgen-faster-mapped which does all of the lattice
    # generation.
    lat_output = '''ark:| copy-feats ark:- ark:- |\
    latgen-faster-mapped --min-active={} --max-active={} \
    --max-mem={} \
    --lattice-beam={} --beam={} \
    --acoustic-scale={} --allow-partial=true \
    --word-symbol-table={} \
    {} {} ark:- ark:- | lattice-scale --acoustic-scale={} ark:- ark:- |\
    gzip -c > {}/lat.{}.gz'''.format(args.min_active, args.max_active,
                                     args.max_mem, args.lattice_beam,
                                     args.beam, args.acoustic_scale,
                                     args.words_file, args.trans_mdl,
                                     args.hclg, args.post_decode_acwt,
                                     args.dumpdir, args.job)

    # Do the decoding (dumping senone posteriors)
    model.eval()
    with torch.no_grad():
        with kaldi_io.open_or_fd(lat_output, 'wb') as f:
            utt_mat = []
            prev_key = b''
            generator = evaluation_batches(dataset)
            # Each minibatch is guaranteed to have at most 1 utterance. We need
            # to append the output of subsequent minibatches corresponding to
            # the same utterances. These are stored in ``utt_mat'', which is
            # just a buffer to accumulate the posterior outputs of minibatches
            # corresponding to the same utterance. The posterior state
            # probabilities are normalized (subtraction in log space), by the
            # log priors in order to produce pseudo-likelihoods useable for
            # for lattice generation with latgen-faster-mapped
            for key, mat in decode_dataset(args,
                                           generator,
                                           model,
                                           device='cpu',
                                           output_idx=args.output_idx):
                if len(utt_mat) > 0 and key != prev_key:
                    kaldi_io.write_mat(f,
                                       np.concatenate(utt_mat,
                                                      axis=0)[:utt_length, :],
                                       key=prev_key.decode('utf-8'))
                    utt_mat = []
                utt_mat.append(mat - args.prior_scale * priors)
                prev_key = key
                utt_length = dataset.utt_lengths[key] // dataset.subsample

            # Flush utt_mat buffer at the end
            if len(utt_mat) > 0:
                kaldi_io.write_mat(f,
                                   np.concatenate(utt_mat,
                                                  axis=0)[:utt_length, :],
                                   key=prev_key.decode('utf-8'))
Beispiel #16
0
def construct_tensor(orig_feat_scp, ark_scp_output, tuncate_len):
    with ko.open_or_fd(ark_scp_output, 'wb') as f:
        for key, mat in ko.read_mat_scp(orig_feat_scp):
            tensor = tensor_cnn_utt(mat, truncate_len)
            repetition = int(tensor.shape[1] / truncate_len)
            for i in range(repetition):
                sub_tensor = tensor[:, i * truncate_len:(i + 1) * truncate_len]
                new_key = key + '-' + str(i)
                ko.write_mat(f, sub_tensor, key=new_key)
Beispiel #17
0
def extract_file(wav_lines, wfilename, winstep, winlen, mode):

    ark_scp_output = 'ark:| copy-feats ark:- ark,scp:%s.ark,%s.scp' %(wfilename, wfilename)
    with kaldi_io.open_or_fd(ark_scp_output, 'wb') as wf:
        for line in wav_lines:
            items = line.split()
            key = items[0]
            wav_ = items[5]
            mat = extract(wav_, winstep, winlen, mode)
            kaldi_io.write_mat(wf, mat, key=key)
Beispiel #18
0
def outputfile_mat2ark(mat, filepath):
    logging.debug(filepath)
    if not filepath.endswith('.ark'):
        logging.error('extension error')
        return
    with kaldi_io.open_or_fd(filepath, 'wb') as f:
        for k, m in mat.items():
            if 0 in m.shape:
                continue
            kaldi_io.write_mat(f, m, k)
Beispiel #19
0
def lplda_kaldi_wrapper(lda_dim, kaldi_scp, kaldi_utt2spk, lda_transform):
    
    data = read_kaldi_scp_flt(kaldi_scp)
    spk2utt = load_spk2utt(kaldi_utt2spk)

    # train_vecs = {}
    # for spkid in spk2utt.keys():
    #     train_vecs[spkid] = []  
    #     for uttid in spk2utt[spkid]:
    #         map_uttid = spkid[6:] + "_" + uttid + "_A"            
    #         if map_uttid in data.keys():
    #             train_vecs[spkid].append(data[map_uttid])

    train_vecs = {}
    for spkid in spk2utt.keys():
        train_vecs[spkid] = []
        uttid_uniq = []
        for uttid in spk2utt[spkid]:
            uttid_uniq.append(uttid)
        uttid_uniq = sorted(set(uttid_uniq))
        for uttid in uttid_uniq:
            if uttid in data.keys():
                train_vecs[spkid].append(data[uttid])

    ## get ids, vecs
    ids, vecs = get_lambda_ids_and_vecs(train_vecs)
    int_ids = label_str_to_int(ids)
    print ("lplda, ", len(vecs), len(vecs[0]))

    ## compute and sub mean
    m = np.mean(vecs, axis=0)
    vecs = vecs - m
    
    ## lplda
    lda = LocalPairwiseLinearDiscriminantAnalysis(n_components=lda_dim)
    lda.fit(np.asarray(vecs), np.asarray(int_ids))

    ## compute mean
    dim = len(m)
    transform_m = lda.transform(np.reshape(m, (1, dim)))
    
    # copy to kaldi format
    transform = np.zeros([lda_dim, dim + 1], float)
    lda_trans = lda.scalings_.T[:lda_dim, :]
    # m_trans = np.dot(lda_trans, m)
    for r in range(lda_dim):
        for c in range(dim):
            transform[r][c] = lda_trans[r][c]
        transform[r][dim] = -1.0 * transform_m[0][r]

    ## save lda transform
    kaldi_io.write_mat(lda_transform, transform)

    return
Beispiel #20
0
def main():
    args = get_args()
    inp_feats_scp = args.inp_feats_scp
    out_feats_ark = args.out_feats_ark
    ark_scp_output = 'ark:| copy-feats --compress=true ark:- ark,scp:{p}.ark,{p}.scp'.format(
        p=out_feats_ark)
    with kaldi_io.open_or_fd(ark_scp_output, 'wb') as f:
        for utt, feats in kaldi_io.read_mat_scp(inp_feats_scp):
            mfcc = convert_mfcc_to_fbank(feats)
            np.save('ark_check4/{u}.npy'.format(u=utt), mfcc)
            kaldi_io.write_mat(f, mfcc, key=utt)
Beispiel #21
0
def test(testloader, model, output_file, use_cuda):
    # switch to test mode
    model.eval()
    with open(output_file, 'wb') as f:
        for i, (inputs, input_lengths, utt_ids) in enumerate(testloader):
            lprobs, output_lengths = model(inputs, input_lengths)
            for j in range(inputs.size(0)):
                output_length = output_lengths[j]
                utt_id = utt_ids[j]
                kaldi_io.write_mat(
                    f, (lprobs[j, :output_length, :]).cpu().detach().numpy(),
                    key=utt_id)
Beispiel #22
0
def main(args):
    keys, mat_list = extract_adv_voiced_feats(args.grads, args.vad,
                                              args.ori_feats, args.sigma)
    adv_featsfile = 'ark:| copy-feats ark: ark,scp:data/voxceleb1_test/spoofed_voiced_feats_sigma' + str(
        args.sigma
    ) + '.ark,data/voxceleb1_test/spoofed_voiced_feats_sigma' + str(
        args.sigma) + '.scp'
    utts_done = 0

    with kaldi_io.open_or_fd(adv_featsfile, 'wb') as f:
        for key, mat in zip(keys, mat_list):
            kaldi_io.write_mat(f, mat, key=key)
        utts_done += 1
        print('%d done.' % (utts_done))
 def testMatrixReadWrite(self):
     """
     Test read/write for float matrices.
     """
     # read,
     flt_mat = { k:m for k,m in kaldi_io.read_mat_scp('tests/data/feats_ascii.scp') } # ascii-scp,
     flt_mat2 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats_ascii.ark') } # ascii-ark,
     flt_mat3 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats.ark') } # ascii-ark,
     # store,
     with kaldi_io.open_or_fd('tests/data_re-saved/mat.ark','wb') as f:
         for k,m in flt_mat3.items(): kaldi_io.write_mat(f, m, k)
     # read and compare,
     for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat.ark'):
         self.assertTrue(np.array_equal(m, flt_mat3[k]), msg="flt. matrix same after re-saving")
Beispiel #24
0
def _run_kaldi(command, input_tensor):
    """Run provided Kaldi command, pass a tensor and get the resulting tensor

    Assumption:
        The provided Kaldi command consumes one ark and produces one ark.
        i.e. 'ark:- ark:-'
    """
    process = subprocess.Popen(command,
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE)
    kaldi_io.write_mat(process.stdin, input_tensor.numpy(), key='foo')
    process.stdin.close()
    result = dict(kaldi_io.read_mat_ark(process.stdout))['foo']
    return torch.from_numpy(result.copy())  # copy supresses some torch warning
Beispiel #25
0
 def generate_feats(test_dir, num=10, seed=0):
     """generate feature matrices."""
     feats = {}
     np.random.seed(seed)
     with open(os.path.join(test_dir, 'feats.scp'), 'w',
               encoding='utf-8') as f:
         for i in range(num):
             utt_id = 'utt_id_' + str(i)
             ark_file = os.path.join(test_dir, 'mat_' + str(i) + '.ark')
             f.write(utt_id + ' ' + ark_file + ':0\n')
             length = np.random.randint(200, 800)
             m = np.random.uniform(-10.0, 10.0, (length, 40))
             feats[utt_id] = m
             kaldi_io.write_mat(ark_file, m)
     return feats
Beispiel #26
0
 def generate_feats(test_dir, num=10, seed=0):
     """generate feature matrices."""
     expected_feats = {}
     np.random.seed(seed)
     utt_ids, rxfiles, utt2num_frames = [], [], []
     for i in range(num):
         utt_id = "utt_id_" + str(i)
         ark_file = os.path.join(test_dir, "mat_" + str(i) + ".ark")
         length = np.random.randint(200, 800)
         m = np.random.uniform(-10.0, 10.0, (length, 40))
         expected_feats[utt_id] = m
         kaldi_io.write_mat(ark_file, m)
         utt_ids.append(utt_id)
         rxfiles.append(ark_file + ":0")
         utt2num_frames.append(length)
     return expected_feats, utt_ids, rxfiles, utt2num_frames
Beispiel #27
0
def extract(scp_file: str,
            output_ark: str,
            win_length: int = 25,
            hop_length: int = 10,
            n_fft: int = 512):

    file_key_path = getlines(scp_file)
    with open(output_ark, 'wb') as wp:
        for line in tqdm(file_key_path, ncols=100):
            feat = spec(line[1],
                        win_length=win_length,
                        hop_length=hop_length,
                        n_fft=n_fft)
            kaldi_io.write_mat(wp, feat, key=line[0])

    return
def kang():
    for i in range(2):
        f_mag = 'ark:| copy-feats --compress=true ark:- ark,scp:file1.ark,file2.scp'

        audop_path = '/home/kang/Develop/Robust_e2e_gan/db/data_aishell/wav/dev/S0724/BAC009S0724W0121.wav'
        rate, sig = wav.read(audop_path)
        sig = sig.astype('float32')
        D = librosa.stft(sig,
                         n_fft=512,
                         hop_length=256,
                         win_length=512,
                         window=scipy.signal.hamming)
        spect = np.abs(D)
        uttid_new = "file2deID"

        kaldi_io.write_mat(f_mag, spect.transpose((1, 0)), key=uttid_new)
def test(net,
         loader,
         device,
         *,
         write_posts=False,
         out_folder=None,
         count_file=None):
    if not write_posts:
        assert out_folder is None and count_file is None
    if write_posts:
        assert out_folder is not None and count_file is not None

    if write_posts:
        # set folder for posteriors ark
        post_file = kaldi_io.open_or_fd(out_folder + '/pout_test.ark', 'wb')
        counts = load_counts(count_file)

    errs = []
    losses = []
    lens = []
    net.eval()
    # Reading dev-set sentence by sentence
    for name, fea, lab in loader:
        inp = fea.to(device, dtype=torch.float)
        lab = lab.to(device)

        with torch.no_grad():
            loss, err, pout, pred = net(inp, lab)

        if write_posts:
            # writing the ark containing the normalized posterior probabilities (needed for kaldi decoding)
            kaldi_io.write_mat(
                post_file,
                pout.data.to('cpu').numpy() - np.log(counts / np.sum(counts)),
                name[0])

        losses.append(loss.item())
        errs.append(err.item())
        lens.append(inp.shape[0])

    avg_loss = sum(losses) / len(losses)
    avg_err = sum(errs) / sum(lens)

    if write_posts:
        post_file.close()

    return avg_loss, avg_err
Beispiel #30
0
def recog(args):
    set_deterministic_pytorch(args)

    from espnet.asr.asr_utils import get_model_conf, torch_load
    # read training config
    idim, odim, train_args = get_model_conf(args.model, args.model_conf)

    # load trained model parameters
    model_class = dynamic_import(train_args.model_module)
    model = model_class(idim, odim, train_args)
    torch_load(args.model, model)
    # model, train_args = load_trained_model(args.model)
    model.recog_args = args

    # gpu
    if args.ngpu == 1:
        gpu_id = list(range(args.ngpu))
        logging.info("gpu id: " + str(gpu_id))
        model.cuda()

    # read json data
    with open(args.recog_json, "rb") as f:
        js = json.load(f)["utts"]

    load_inputs_and_targets = LoadInputsAndTargets(
        mode="asr",
        load_output=False,
        sort_in_input_length=False,
        preprocess_conf=None,
        preprocess_args={"train": False},
    )

    ark_file = open(args.result_ark, 'wb')
    if args.batchsize == 0:
        with torch.no_grad():
            for idx, name in enumerate(js.keys(), 1):
                logging.info("(%d/%d) decoding " + name, idx, len(js.keys()))
                batch = [(name, js[name])]

                feat = load_inputs_and_targets(batch)
                feat = (feat[0][0])

                hyps = model.recognize(feat)
                hyps = hyps.squeeze(0)
                hyps = hyps.data.numpy()

                write_mat(ark_file, hyps, key=name)
Beispiel #31
0
def main(args):
    cpath = os.getcwd()
    # keys, mat_list = extract_adv_mat(args.spoofed_feats, args.vad, args.ori_feats)
    # print(1+args.sigma)
    keys, mat_list = extract_adv_mat_frm_grads(args.grads, args.vad,
                                               args.ori_feats, args.sigma)
    adv_featsfile = 'ark:| copy-feats ark: ark,scp:' + cpath + '/data/voxceleb1_test/spoofed_feats_sigma' + str(
        args.sigma
    ) + '.ark,' + cpath + '/data/voxceleb1_test/spoofed_feats_sigma' + str(
        args.sigma) + '.scp'
    utts_done = 0

    with kaldi_io.open_or_fd(adv_featsfile, 'wb') as f:
        for key, mat in zip(keys, mat_list):
            kaldi_io.write_mat(f, mat, key=key)
        utts_done += 1
        print('%d done.' % (utts_done))
Beispiel #32
0
    def generate_z_ark(self):
        args = self.args

        c_dim = self.args.c_dim

        # init model
        if args.infer_epoch == -1:
            self.reload_checkpoint()
        else:
            ckpt_path = '{}/ckpt_epoch{}.pt'.format(args.ckpt_dir,
                                                    args.infer_epoch)
            assert os.path.exists(ckpt_path) == True
            checkpoint_dict = torch.load(ckpt_path, map_location=self.device)
            self.model.load_state_dict(checkpoint_dict['model'])
            print("successfully reload {} [model] to infer".format(ckpt_path))

        self.model.to(self.device)
        self.model.eval()

        # init data x
        dataset = feats_data_loader(npz_path=self.args.test_data_npz,
                                    dataset_name=self.args.dataset_name)
        utt_data = dataset.get_utt_data()

        ark_dir = args.ark_dir
        if not os.path.exists(ark_dir):
            os.makedirs(ark_dir)
        ark_path = args.ark_dir + os.sep + 'feats.ark'

        pbar = tqdm(total=len(utt_data))
        with open(ark_path, 'wb') as f:
            for utt, data in utt_data.items():
                data = np.array(data)
                data = torch.from_numpy(data)
                data = data.to(self.device)
                data, _ = self.model(data)
                data = data.cpu().detach().numpy()
                # split data
                data = data[:, :c_dim]
                kaldi_io.write_mat(f, data, utt)
                pbar.update(1)
                pbar.set_description('generate utter {} of frames {}'.format(
                    utt, data.shape[0]))
        pbar.close()
        print("successfully save kaldi ark in {}".format(ark_path))
    def testPipeReadWrite(self):
        """
        Test read/write for pipes.

        Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct.
        """
        # the following line disables 'stderr' forwarding, comment it for DEBUG,
        with open("/dev/null","w") as sys.stderr:
            # read,
            flt_mat4 = { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats ark:tests/data/feats.ark ark:- |') }
            # write to pipe,
            with kaldi_io.open_or_fd('ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark','wb') as f:
                for k,m in flt_mat4.items(): kaldi_io.write_mat(f, m, k)
            # read it again and compare,
            for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat_pipe.ark'):
                self.assertTrue(np.array_equal(m, flt_mat4[k]),"flt. matrix same after read/write via pipe")

            # read some other formats from pipe,
            i32_vec3 = { k:v for k,v in kaldi_io.read_vec_int_ark('ark:copy-int-vector ark:tests/data/ali.ark ark:- |') }
            flt_vec4 = { k:v for k,v in kaldi_io.read_vec_flt_ark('ark:copy-vector ark:tests/data/conf.ark ark:- |') }
Beispiel #34
0
      if rnn==1:
        inp=inp.view(inp.shape[0],1,inp.shape[1])
        lab=lab.view(lab.shape[0],1)
      beg_snt=data_end_index[i]
    
   
   [loss,err,pout] = net(inp,lab,test_flag)
   
   if multi_gpu:
     loss=loss.mean()
     err=err.mean()

   if do_forward:
    if rnn==1:
       pout=pout.view(pout.shape[0]*pout.shape[1],pout.shape[2]) 
    kaldi_io.write_mat(post_file, pout.data.cpu().numpy()-np.log(counts/np.sum(counts)), data_name[i])
    
   if do_training:
       
    # free the gradient buffer
    optimizer.zero_grad()  
  
    # Gradient computation
    loss.backward()
    
    # Gradient clipping
    #torch.nn.utils.clip_grad_norm(net.parameters(), 1)
  
    # updating parameters
    optimizer.step()