예제 #1
0
def evaluate_deepmine_from_xvecs(ds_eval, outfolder='./exp/example_xvecs'):
    if not os.path.isfile(os.path.join(outfolder, 'xvector.scp')):
        xvec_scps = glob(os.path.join(outfolder, '*.scp'))
        assert len(xvec_scps) != 0, 'No xvector scps found'
        with open(os.path.join(outfolder, 'xvector.scp'), 'w+') as outfile:
            for fname in xvec_scps:
                with open(fname) as infile:
                    for line in infile:
                        outfile.write(line)

    xvec_dict = odict_from_2_col(os.path.join(outfolder, 'xvector.scp'))
    answer_col0 = []
    answer_col1 = []
    answer_col2 = []

    for i in tqdm(range(len(ds_eval))):
        model, enrol_utts, eval_utts, = ds_eval.get_item_utts(i)
        answer_col0.append([model for _ in range(len(eval_utts))])
        answer_col1.append(eval_utts)

        model_embeds = np.array(
            [read_vec_flt(xvec_dict[u]) for u in enrol_utts])
        model_embed = np.mean(normalize(model_embeds, axis=1),
                              axis=0).reshape(1, -1)

        eval_embeds = np.array([read_vec_flt(xvec_dict[u]) for u in eval_utts])
        eval_embeds = normalize(eval_embeds, axis=1)

        scores = cosine_similarity(model_embed, eval_embeds).squeeze(0)
        assert len(scores) == len(eval_utts)
        answer_col2.append(scores)

    answer_col0 = np.concatenate(answer_col0)
    answer_col1 = np.concatenate(answer_col1)
    answer_col2 = np.concatenate(answer_col2)

    print('Writing results to file...')
    with open(os.path.join(outfolder, 'answer_full.txt'), 'w+') as fp:
        for m, ev, s in tqdm(zip(answer_col0, answer_col1, answer_col2)):
            line = '{} {} {}\n'.format(m, ev, s)
            fp.write(line)

    with open(os.path.join(outfolder, 'answer.txt'), 'w+') as fp:
        for s in tqdm(answer_col2):
            line = '{}\n'.format(s)
            fp.write(line)

    if (answer_col0 == np.array(ds_eval.models_eval)).all():
        print('model ordering matched')
    else:
        print('model ordering was not correct, need to fix before submission')

    if (answer_col1 == np.array(ds_eval.eval_utts)).all():
        print('eval utt ordering matched')
    else:
        print(
            'eval utt ordering was not correct, need to fix before submission')
예제 #2
0
    def __init__(self, args, dictionary):
        super().__init__(args)
        self.dictionary = dictionary
        self.feat_in_channels = args.feat_in_channels
        self.specaugment_config = args.specaugment_config
        self.num_targets = args.num_targets
        self.training_stage = hasattr(args, "valid_subset")

        # the following attributes are related to state_prior estimate
        self.initial_state_prior = None
        if args.initial_state_prior_file is not None:  # only relevant for Xent training, used in models
            self.initial_state_prior = kaldi_io.read_vec_flt(
                args.initial_state_prior_file)
            self.initial_state_prior = torch.from_numpy(
                self.initial_state_prior)
            assert self.initial_state_prior.size(0) == self.num_targets, \
                "length of initial_state_prior ({}) != num_targets ({})".format(
                    self.initial_state_prior.size(0), self.num_targets
                )
        self.state_prior_update_interval = args.state_prior_update_interval
        if self.state_prior_update_interval is None and self.initial_state_prior is not None:
            logger.info("state prior will not be updated during training")
        self.state_prior_update_smoothing = args.state_prior_update_smoothing
        self.averaged_state_post = None  # state poterior will be saved here before commited as new state prior

        # the following 4 options are for chunk-wise training/test (including Xent and LF-MMI)
        self.chunk_width = args.chunk_width
        self.chunk_left_context = args.chunk_left_context
        self.chunk_right_context = args.chunk_right_context
        self.label_delay = args.label_delay  # only for chunk-wise Xent training

        torch.backends.cudnn.deterministic = True
    def __init__(self, cfg: SpeechRecognitionHybridConfig, dictionary,
                 feat_dim):
        super().__init__(cfg)
        self.dictionary = dictionary
        self.feat_dim = feat_dim
        self.feat_in_channels = cfg.feat_in_channels
        self.num_targets = cfg.num_targets
        self.training_stage = (cfg.max_epoch > 0)  # a hack

        # the following attributes are related to state_prior estimate
        self.initial_state_prior = None
        if cfg.initial_state_prior_file is not None:  # only relevant for Xent training, used in models
            self.initial_state_prior = kaldi_io.read_vec_flt(
                cfg.initial_state_prior_file)
            self.initial_state_prior = torch.from_numpy(
                self.initial_state_prior)
            assert (
                self.initial_state_prior.size(0) == self.num_targets
            ), "length of initial_state_prior ({}) != num_targets ({})".format(
                self.initial_state_prior.size(0), self.num_targets)
        self.state_prior_update_interval = cfg.state_prior_update_interval
        if self.state_prior_update_interval is None and self.initial_state_prior is not None:
            logger.info("state prior will not be updated during training")
        self.state_prior_update_smoothing = cfg.state_prior_update_smoothing
        self.averaged_state_post = None  # state poterior will be saved here before commited as new state prior

        # the following 4 options are for chunk-wise training/test (including Xent and LF-MMI)
        self.chunk_width = cfg.chunk_width
        self.chunk_left_context = cfg.chunk_left_context
        self.chunk_right_context = cfg.chunk_right_context
        self.label_delay = cfg.label_delay  # only for chunk-wise Xent training

        torch.backends.cudnn.deterministic = True
예제 #4
0
def fetch_llkprob_segment(wavid,
                          ipath2prob_scp,
                          seg=(0.0, math.inf),
                          win_len=0.025,
                          hop_len=0.010):
    """
    given wavid, return an loglikehood probability segment from ipath2prob_scp

    args: wavid       -- string, id of a audio file
          ipath2prob_scp -- the path to llk_prob.scp
                            each wavid corresponds to a float vector of llk_prob
                            llk_prob: the prob of a specific GMM generating a frame
          seg            -- a tuple of (start_time, end_time)
          win_len -- window length in second
          hop_len -- window shift in second

    return: vec        -- llk_prob curve with numpy format
  """
    fd = kaldi_io.open_or_fd(ipath2prob_scp)
    for line in fd:
        (wid, path) = line.decode("utf-8").rstrip().split(' ', 1)
        if wavid == wid:
            vec = kaldi_io.read_vec_flt(path)  # np.array
            start_t, end_t = seg
            end_t = min(end_t, vec.shape[0] *
                        hop_len)  # the second term is float by default
            assert start_t < end_t and start_t >= 0.0, "InputArg: seg {0} invalid".format(
                str(seg))
            start_f = int(start_t / hop_len)
            end_f = int(end_t / hop_len)
            return vec[start_f:end_f]
예제 #5
0
 def load_phn(f):
     #files           = [train_scp_info['utt2file'][uu] for uu in u]
     vf = fid2vadfile[p_aug_remove.sub('', f)]  # File IDs
     vad = kaldi_io.read_vec_flt(vf)
     if (phn_vad_scp != "None"):
         lab = np.genfromtxt(phn_dir + f + ".hp")[np.where(vad == 1)]
     else:
         lab = np.genfromtxt(phn_dir + f + ".hp")
     lab = lab.reshape(1, -1)
     return lab
예제 #6
0
def load_kaldi_feats_segm_same_dur_plus_lab(rng,
                                            files,
                                            min_length,
                                            max_length,
                                            n_avl_samp,
                                            lab_dir,
                                            f_ids,
                                            vad_files,
                                            start_from_zero=False):

    min_n_avl_samp = np.min(n_avl_samp)
    max_len = np.min(
        [min_n_avl_samp + 1, max_length]
    )  # Need to add 1 because max_len because the intervall is [min_len, max_len)?????!!!!???
    n_sel_samp = rng.randint(min_length, max_len)  # not [min_len, max_len]
    start = []
    end = []

    vad = [kaldi_io.read_vec_flt(vf).astype(bool) for vf in vad_files]
    lab = [
        np.genfromtxt(lab_dir + f + ".hp")[vad[i]] for i, f in enumerate(f_ids)
    ]

    assert (len(lab[0]) == n_avl_samp[0])

    for i, f in enumerate(files):
        # The start_from_zero option is mainly for debugging/development
        if start_from_zero:
            start.append(0)
        else:
            last_possible_start = n_avl_samp[i] - n_sel_samp
            start.append(
                rng.randint(0, last_possible_start + 1)[0]
            )  # This means the intervall is [0,last_possible_start + 1) = [0, last_possible_start]
            end.append(start[-1] + n_sel_samp)
    ff = [
        "xxx {}[{}:{},:]".format(files[i], start[i], end[i])
        for i in range(len(files))
    ]
    data = [rr[1] for rr in kaldi_io.read_mat_scp(ff)]
    data = np.stack(data, axis=0)

    lab = np.array([l[start[i]:end[i]] for i, l in enumerate(lab)])

    return data, lab
예제 #7
0
def _main(args, output_file):
    logging.basicConfig(
        format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        level=logging.INFO,
        stream=output_file,
    )
    logger = logging.getLogger("espresso.dump_posteriors")

    print_options_meaning_changes(args, logger)

    utils.import_user_module(args)

    if args.max_tokens is None and args.max_sentences is None:
        args.max_tokens = 12000
    logger.info(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load dataset split
    task = tasks.setup_task(args)
    task.load_dataset(args.gen_subset)

    # Load ensemble
    logger.info("loading model(s) from {}".format(args.path))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args.path),
        arg_overrides=eval(args.model_overrides),
        task=task,
        suffix=getattr(args, "checkpoint_suffix", ""),
    )

    # Load state prior for cross-entropy trained systems decoding
    if args.state_prior_file is not None:
        prior = torch.from_numpy(kaldi_io.read_vec_flt(args.state_prior_file))
    else:
        prior = []

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_()
        if args.fp16:
            model.half()
        if use_cuda:
            model.cuda()
        if isinstance(prior, list) and getattr(model, "state_prior",
                                               None) is not None:
            prior.append(model.state_prior.unsqueeze(0))

    if isinstance(prior, list) and len(prior) > 0:
        prior = torch.cat(prior, 0).mean(0)  # average priors across models
        prior = prior / prior.sum()  # re-normalize
    elif isinstance(prior, list):
        prior = None

    if prior is not None:
        if args.fp16:
            prior = prior.half()
        if use_cuda:
            prior = prior.cuda()
        log_prior = prior.log()
    else:
        log_prior = None

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args.gen_subset),
        max_tokens=args.max_tokens,
        max_sentences=args.max_sentences,
        max_positions=utils.resolve_max_positions(
            task.max_positions(), *[
                model.max_positions() if hasattr(model, "encoder") else
                (None, model.max_positions()) for model in models
            ]),
        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
        required_batch_size_multiple=args.required_batch_size_multiple,
        num_shards=args.num_shards,
        shard_id=args.shard_id,
        num_workers=args.num_workers,
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=args.log_format,
        log_interval=args.log_interval,
        default_log_format=("tqdm" if not args.no_progress_bar else "none"),
    )

    # Initialize generator
    gen_timer = StopwatchMeter()
    generator = task.build_generator(models, args)

    # Generate and dump
    num_sentences = 0
    chunk_width = getattr(task, "chunk_width", None)
    lprobs_wspecifier = "ark:| copy-matrix ark:- ark:-"
    with kaldi_io.open_or_fd(lprobs_wspecifier, "wb") as f:
        if chunk_width is None:  # normal dumping (i.e., no chunking)
            for sample in progress:
                sample = utils.move_to_cuda(sample) if use_cuda else sample
                if "net_input" not in sample:
                    continue

                gen_timer.start()
                lprobs, padding_mask = task.inference_step(
                    generator, models, sample)
                if log_prior is not None:
                    assert lprobs.size(-1) == log_prior.size(0)
                    lprobs = lprobs - log_prior
                out_lengths = (~padding_mask).long().sum(
                    dim=1).cpu() if padding_mask is not None else None
                num_processed_frames = sample["ntokens"]
                gen_timer.stop(num_processed_frames)
                num_sentences += sample["nsentences"]

                if out_lengths is not None:
                    for i in range(sample["nsentences"]):
                        length = out_lengths[i]
                        kaldi_io.write_mat(f,
                                           lprobs[i, :length, :].cpu().numpy(),
                                           key=sample["utt_id"][i])
                else:
                    for i in range(sample["nsentences"]):
                        kaldi_io.write_mat(f,
                                           lprobs[i, :, :].cpu().numpy(),
                                           key=sample["utt_id"][i])
        else:  # dumping chunks within the same utterance from left to right
            for sample in progress:  # sample is actually a list of batches
                sample = utils.move_to_cuda(sample) if use_cuda else sample
                utt_id = sample[0]["utt_id"]
                id = sample[0]["id"]
                whole_lprobs = None
                for i, chunk_sample in enumerate(sample):
                    if "net_input" not in chunk_sample:
                        continue

                    assert chunk_sample["utt_id"] == utt_id and (
                        chunk_sample["id"] == id).all()
                    gen_timer.start()
                    lprobs, _ = task.inference_step(generator, models,
                                                    chunk_sample)
                    if log_prior is not None:
                        assert lprobs.size(-1) == log_prior.size(0)
                        lprobs = lprobs - log_prior
                    if whole_lprobs is None:
                        whole_lprobs = lprobs.cpu()
                    else:
                        whole_lprobs = torch.cat((whole_lprobs, lprobs.cpu()),
                                                 1)
                    num_processed_frames = chunk_sample["ntokens"]
                    gen_timer.stop(num_processed_frames)

                    if i == len(sample) - 1:
                        num_sentences += len(utt_id)
                        for j in range(len(utt_id)):
                            truncated_length = models[0].output_lengths(
                                task.dataset(args.gen_subset).src_sizes[id[j]]
                            )  # length is after possible subsampling by the model
                            mat = whole_lprobs[j, :truncated_length, :]
                            kaldi_io.write_mat(f, mat.numpy(), key=utt_id[j])

    logger.info(
        "Dumped {} utterances ({} frames) in {:.1f}s ({:.2f} sentences/s, {:.2f} frames/s)"
        .format(num_sentences, gen_timer.n, gen_timer.sum,
                num_sentences / gen_timer.sum, 1. / gen_timer.avg))

    return
from os.path import join
import sys
import kaldi_io
import numpy as np

from sklearn.neighbors import KNeighborsClassifier

args = sys.argv

feat_file = join(args[1], 'xvector.scp')

with open(feat_file) as f:
    lines = f.read().splitlines()
    npts = len(lines)
    test_x = kaldi_io.read_vec_flt(lines[0].split()[1])
    fdim = test_x.shape[0]
    
    X = np.zeros((npts, fdim))
    y = []
    for idx, line in enumerate(lines):
        sp = line.split()
        X[idx, :] = kaldi_io.read_vec_flt(sp[1])
        # male/female is present in uttname
        y.append(sp[0].split('-')[2].split('_')[0])

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y) 

print(neigh.score(X, y))
예제 #9
0
      '{}/xvectors_sre/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_mx6/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre16_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre16_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre16_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path),
      '{}/xvectors_sre18_dev_enrollment/spk_xvector.scp'.format(xvectors_base_path),
      '{}/xvectors_sre18_dev_test/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre18_dev_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre18_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre18_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre18_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path),
      '{}/xvectors_sre19_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre19_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path),
      '{}/xvectors_sre19_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path)])
 
 mega_scp_dict = {}
 mega_xvec_dict = {}
 for fx in xvector_scp_list:
     subprocess.call(['sed','-i', 's| exp/xvector_nnet_1a| {}|g'.format(xvectors_base_path), fx])
     with open(fx) as f:
         scp_list = f.readlines()
     scp_dict = {x.split(' ', 1)[0]: x.rstrip('\n').split(' ', 1)[1] for x in scp_list}
     xvec_dict = {x.split(' ', 1)[0]: kaldi_io.read_vec_flt(x.rstrip('\n').split(' ', 1)[1]) for x in scp_list}
     mega_scp_dict.update(scp_dict)
     mega_xvec_dict.update(xvec_dict)
 
 mega_scp = np.c_[np.asarray(list(mega_scp_dict.keys()))[:,np.newaxis], np.asarray(list(mega_scp_dict.values()))]
 
 np.savetxt('xvectors/mega_xvector_voxceleb_8k.scp', mega_scp, fmt='%s', delimiter=' ', comments='')
 
 pickle.dump(mega_xvec_dict, open('xvectors/mega_xvector_voxceleb_8k.pkl', 'wb'))
        # mega_scp_dict = {}
        mega_xvec_dict = pickle.load(
            open('xvectors/mega_xvector_voices_voxceleb_16k.pkl', 'rb'))
        for fx in xvector_scp_list:
            subprocess.call([
                'sed', '-i', 's| {}| {}|g'.format(xv_path, xvectors_base_path),
                fx
            ])
            with open(fx) as f:
                scp_list = f.readlines()
            scp_dict = {
                os.path.splitext(os.path.basename(x.split(' ', 1)[0]))[0]:
                x.rstrip('\n').split(' ', 1)[1]
                for x in scp_list
            }
            xvec_dict = {
                os.path.splitext(os.path.basename(x.split(' ', 1)[0]))[0]:
                kaldi_io.read_vec_flt(x.rstrip('\n').split(' ', 1)[1])
                for x in scp_list
            }
            # mega_scp_dict.update(scp_dict)
            mega_xvec_dict.update(xvec_dict)

        # mega_scp = np.c_[np.asarray(list(mega_scp_dict.keys()))[:,np.newaxis], np.asarray(list(mega_scp_dict.values()))]

        # np.savetxt('xvectors/mega_xvector_voices_voxceleb_16k.scp', mega_scp, fmt='%s', delimiter=' ', comments='')

        pickle.dump(
            mega_xvec_dict,
            open('xvectors/mega_xvector_voices_voxceleb_16k.pkl', 'wb'))
예제 #11
0
 def __getitem__(self, idx):
     # feat = kaldi_io.read_mat(self.feat_list[idx])  # reading MFCCs/fbanks...
     feat = kaldi_io.read_vec_flt(self.feat_list[idx])  # reading xvecs
     return feat
예제 #12
0
      file_len(utt2spk_train_path) - len(fea_train))
print("Missing features number dev: ",
      file_len(utt2spk_dev_path) - len(fea_dev))
print("Missing features number eval: ",
      file_len(utt2spk_eval_path) - len(fea_eval))

if suffix:
    fea_train_spk = {
        k: m[0]
        for k, m in kaldi_io.read_mat_scp(replacement_xvectors_path +
                                          '/lda_spk_xvector_mat.scp')
    }
    replacement = fea_train_spk[replacement_key]
    print("Replacing with mean for class", replacement_key)
else:
    replacement = kaldi_io.read_vec_flt(replacement_xvectors_path +
                                        '/lda_mean.vec')
    print("Replacing with mean all speaker xvector.")

########## replace missing xvectors ##############

for utt in utts_dev:
    if utt not in fea_dev:
        fea_dev[utt] = replacement

for utt in utts_eval:
    if utt not in fea_eval:
        fea_eval[utt] = replacement

for utt in utts_train:
    if utt not in fea_train:
        fea_train[utt] = replacement
예제 #13
0
                fc += 1
                spk2gender[sp[0]] = 'f'
            else:
                continue
        else:
            if mc < MAX_MALE:
                mc += 1
                spk2gender[sp[0]] = 'm'
            else:
                continue

        spk2utt[sp[0]] = utts

        spk_feats = []
        for u in utts:
            utt_feat = kaldi_io.read_vec_flt(feats[u])
            utt_feat = utt_feat[np.newaxis, :]
            #print(utt_feat.shape)
            spk_feats.append(utt_feat)
        spk_feats = np.array(spk_feats)
        spk_feats = spk_feats.squeeze()

        spk2featlen[sp[0]] = spk_feats.shape[0]
        print(spk_feats.shape)
        X.append(spk_feats)

nspk = len(spk2gender.keys())
print("Number of speakers", nspk)

labels = []
print("creating labels for silhouette score...")
예제 #14
0
# read utterance embeddings
with open(sys.argv[1], 'r') as f:
    content = f.readlines()
content = [x.strip() for x in content]

# speaker to utterances mapping
spk2mat = defaultdict(list)
for line in content:
    (key, rxfile) = line.split()
    spk = key.split('-')[0]
    if spk in dev_test_spk.keys():
        uttid = key.split('-')[1] + '_' + key.split('-')[2]
        if uttid not in dev_test_spk[spk]:
            continue
    spk2mat[spk].append(read_vec_flt(rxfile))

#for i in spk2mat.keys():
#    if i in dev_test_spk.keys():
#        print(len(spk2mat[i]))

# create speaker embeddings
out_file = sys.argv[2]
ark_scp_output = 'ark:| copy-vector ark:- ark,scp:' + out_file + '.ark,' + out_file + '.scp'
with open_or_fd(ark_scp_output, 'wb') as f:
    for spk, mat in spk2mat.items():
        spk_emb = np.mean(mat,
                          axis=0).reshape(-1,
                                          )  # get speaker embedding (vector)
        #print(spk_emb.shape)
        #print(spk)
def zscore_normalization(x, mean, std):
    x = (x - mean) / (std + args.eps)
    return x


trial_utts = set()
trial_spks = set()
with open(args.trials, 'r') as fptr:
    for line in fptr:
        enroll_spk, eval_utt, target = line.strip().split()
        trial_utts.add(eval_utt)
        trial_spks.add(enroll_spk)

# Read mean, enroll embedding and eval embedding
mean = kaldi_io.read_vec_flt(args.mean_file)
enroll_spks, enroll_feats = read_target_vector(args.enroll_scp, trial_spks)
eval_utts, eval_feats = read_target_vector(args.eval_scp, trial_utts)
if args.impostor_scp != '':
    impostor_feats = read_impostor_vector(args.impostor_scp)
    impostor_feats = np.array(impostor_feats, dtype=np.float32)
    impostor_feats = impostor_feats - mean
    impostor_feats = preprocessing.normalize(impostor_feats, norm='l2')
# Convert data to numpy
enroll_feats = np.array(enroll_feats, dtype=np.float32)
eval_feats = np.array(eval_feats, dtype=np.float32)
enroll_spks = np.array(enroll_spks)
eval_utts = np.array(eval_utts)

# Subtract mean for enroll and eval  embedding
enroll_feats = enroll_feats - mean
예제 #16
0
    cwd = os.getcwd()

    os.system('bash ' + cwd + '/embedding_extraction.sh ' + kaldiDir + ' ' + wavFile + ' ' + rttmFile)

    ############################################
    ## Convert the xvector.sh to numpy matrix
    ############################################

    data = []  # np.array([])
    file = open(cwd + '/tmpkaldidir/xvectors/xvector.scp', 'r')
    x = file.readlines()
    d = np.empty((len(x), 512))
    for i in range(0, len(x)):
        a = x[i]
        d[i, :] = kaldi_io.read_vec_flt(a.strip().split()[1])
    data.append(d)
    data = np.concatenate(data)   ## x-vectors of a particular session
    np.save('data.npy', data)

    ####################################################################################################################
    #                                  Latent embeddings extraction
    ####################################################################################################################

    timestamp = np.load(timestamp + '.npy')   # Load the timestamp of the saved trained model

    for j in range(3, len(timestamp)):  # Evaluate for the saved models (20k, 25k, 30k)
        timestamp1 = timestamp[j]   # Timestamp of the saved model

        latent = recon_enc(timestamp1, sampler, z_dim, beta_cycle_label, beta_cycle_gen, data, batch_size)  # model load and prediction
예제 #17
0
async def async_read_xvec(path):
    return read_vec_flt(path)
예제 #18
0
def _main(cfg, output_file):
    logging.basicConfig(
        format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        level=os.environ.get("LOGLEVEL", "INFO").upper(),
        stream=output_file,
    )
    logger = logging.getLogger("espresso.dump_posteriors")

    print_options_meaning_changes(cfg, logger)

    utils.import_user_module(cfg.common)

    if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None:
        cfg.dataset.max_tokens = 12000
    logger.info(cfg)

    # Fix seed for stochastic decoding
    if cfg.common.seed is not None and not cfg.generation.no_seed_provided:
        np.random.seed(cfg.common.seed)
        utils.set_torch_seed(cfg.common.seed)

    use_cuda = torch.cuda.is_available() and not cfg.common.cpu

    task = tasks.setup_task(cfg.task)

    overrides = ast.literal_eval(cfg.common_eval.model_overrides)

    # Load ensemble
    logger.info("loading model(s) from {}".format(cfg.common_eval.path))
    models, saved_cfg = checkpoint_utils.load_model_ensemble(
        utils.split_paths(cfg.common_eval.path),
        arg_overrides=overrides,
        task=task,
        suffix=cfg.checkpoint.checkpoint_suffix,
        strict=(cfg.checkpoint.checkpoint_shard_count == 1),
        num_shards=cfg.checkpoint.checkpoint_shard_count,
    )

    # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config
    task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task)

    # Load state prior for cross-entropy trained systems decoding
    if cfg.generation.state_prior_file is not None:
        prior = torch.from_numpy(
            kaldi_io.read_vec_flt(cfg.generation.state_prior_file))
    else:
        prior = []

    # Optimize ensemble for generation
    for model in models:
        if model is None:
            continue
        if cfg.common.fp16:
            model.half()
        if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
            model.cuda()
        model.prepare_for_inference_(cfg)
        if isinstance(prior, list) and getattr(model, "state_prior",
                                               None) is not None:
            prior.append(model.state_prior.unsqueeze(0))

    if isinstance(prior, list) and len(prior) > 0:
        prior = torch.cat(prior, 0).mean(0)  # average priors across models
        prior = prior / prior.sum()  # re-normalize
    elif isinstance(prior, list):
        prior = None

    if prior is not None:
        if cfg.common.fp16:
            prior = prior.half()
        if use_cuda:
            prior = prior.cuda()
        log_prior = prior.log()
    else:
        log_prior = None

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(cfg.dataset.gen_subset),
        max_tokens=cfg.dataset.max_tokens,
        max_sentences=cfg.dataset.batch_size,
        max_positions=utils.resolve_max_positions(
            task.max_positions(), *[m.max_positions() for m in models]),
        ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test,
        required_batch_size_multiple=cfg.dataset.required_batch_size_multiple,
        seed=cfg.common.seed,
        num_shards=cfg.distributed_training.distributed_world_size,
        shard_id=cfg.distributed_training.distributed_rank,
        num_workers=cfg.dataset.num_workers,
        data_buffer_size=cfg.dataset.data_buffer_size,
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=cfg.common.log_format,
        log_interval=cfg.common.log_interval,
        default_log_format=("tqdm"
                            if not cfg.common.no_progress_bar else "simple"),
    )

    # Initialize generator
    gen_timer = StopwatchMeter()
    generator = task.build_generator(models, cfg.generation)

    # Generate and dump
    num_sentences = 0
    chunk_width = getattr(task, "chunk_width", None)
    lprobs_wspecifier = "ark:| copy-matrix ark:- ark:-"
    with kaldi_io.open_or_fd(lprobs_wspecifier, "wb") as f:
        if chunk_width is None:  # normal dumping (i.e., no chunking)
            for sample in progress:
                sample = utils.move_to_cuda(sample) if use_cuda else sample
                if "net_input" not in sample:
                    continue

                gen_timer.start()
                lprobs, padding_mask = task.inference_step(
                    generator, models, sample)
                if log_prior is not None:
                    assert lprobs.size(-1) == log_prior.size(0)
                    lprobs = lprobs - log_prior
                out_lengths = ((~padding_mask).long().sum(
                    dim=1).cpu() if padding_mask is not None else None)
                num_processed_frames = sample["ntokens"]
                gen_timer.stop(num_processed_frames)
                num_sentences += (sample["nsentences"] if "nsentences"
                                  in sample else sample["id"].numel())

                if out_lengths is not None:
                    for i in range(sample["nsentences"]):
                        length = out_lengths[i]
                        kaldi_io.write_mat(
                            f,
                            lprobs[i, :length, :].cpu().numpy(),
                            key=sample["utt_id"][i],
                        )
                else:
                    for i in range(sample["nsentences"]):
                        kaldi_io.write_mat(f,
                                           lprobs[i, :, :].cpu().numpy(),
                                           key=sample["utt_id"][i])
        else:  # dumping chunks within the same utterance from left to right
            for sample in progress:  # sample is actually a list of batches
                sample = utils.move_to_cuda(sample) if use_cuda else sample
                utt_id = sample[0]["utt_id"]
                id = sample[0]["id"]
                whole_lprobs = None
                for i, chunk_sample in enumerate(sample):
                    if "net_input" not in chunk_sample:
                        continue

                    assert (chunk_sample["utt_id"] == utt_id
                            and (chunk_sample["id"] == id).all())
                    gen_timer.start()
                    lprobs, _ = task.inference_step(generator, models,
                                                    chunk_sample)
                    if log_prior is not None:
                        assert lprobs.size(-1) == log_prior.size(0)
                        lprobs = lprobs - log_prior
                    if whole_lprobs is None:
                        whole_lprobs = lprobs.cpu()
                    else:
                        whole_lprobs = torch.cat((whole_lprobs, lprobs.cpu()),
                                                 1)
                    num_processed_frames = chunk_sample["ntokens"]
                    gen_timer.stop(num_processed_frames)

                    if i == len(sample) - 1:
                        num_sentences += len(utt_id)
                        for j in range(len(utt_id)):
                            truncated_length = models[0].output_lengths(
                                task.dataset(
                                    cfg.dataset.gen_subset).src_sizes[id[j]]
                            )  # length is after possible subsampling by the model
                            mat = whole_lprobs[j, :truncated_length, :]
                            kaldi_io.write_mat(f, mat.numpy(), key=utt_id[j])

    logger.info(
        "Dumped {:,} utterances ({} frames) in {:.1f}s ({:.2f} sentences/s, {:.2f} frames/s)"
        .format(
            num_sentences,
            gen_timer.n,
            gen_timer.sum,
            num_sentences / gen_timer.sum,
            1.0 / gen_timer.avg,
        ))

    return
lda_dim = int(
    sys.argv[12]
)  # For VB-HMM, x-vectors are reduced to this dimensionality using LDA
Fa = float(
    sys.argv[13])  # Parameter of VB-HMM (see VB_diarization.VB_diarization)
Fb = float(
    sys.argv[14])  # Parameter of VB-HMM (see VB_diarization.VB_diarization)
LoopP = float(
    sys.argv[15])  # Parameter of VB-HMM (see VB_diarization.VB_diarization)
use_VB = True  # False for using only AHC

frm_shift = 0.01  # frame rate of MFCC features

glob_tran = kaldi_io.read_mat(
    tran_mat_file)  # x-vector whitening transformation
glob_mean = kaldi_io.read_vec_flt(mean_vec_file)  # x-vector centering vector
kaldi_plda_train = kaldi_io.read_plda(plda_file)  # out-of-domain PLDA model
kaldi_plda_adapt = kaldi_io.read_plda(
    plda_adapt_file)  # in-domain "adaptation" PLDA model
segs_dict = read_xvector_timing_dict(
    segments_file)  # segments file with x-vector timing information

plda_train_mu, plda_train_tr, plda_train_psi = kaldi_plda_train
plda_adapt_mu, plda_adapt_tr, plda_adapt_psi = kaldi_plda_adapt

# Interpolate across-class, within-class and means of the two PLDA models with interpolation factor "alpha"
plda_mu = alpha * plda_train_mu + (1.0 - alpha) * plda_adapt_mu
W_train = np.linalg.inv(plda_train_tr.T.dot(plda_train_tr))
B_train = np.linalg.inv((plda_train_tr.T / plda_train_psi).dot(plda_train_tr))
W_adapt = np.linalg.inv(plda_adapt_tr.T.dot(plda_adapt_tr))
B_adapt = np.linalg.inv((plda_adapt_tr.T / plda_adapt_psi).dot(plda_adapt_tr))
예제 #20
0
def read_xvec(file):
    return kaldi_io.read_vec_flt(file)
예제 #21
0
fd = open_or_fd(feats_path)
feats = {}
try:
    for line in fd:
        key, rxfile = line.decode().split(' ')
        feats[key] = read_mat(rxfile)
finally:
    if fd is not feats_path: fd.close()

vad_path = '/home/abbas/abbas/workspace/data/sre16/v2/data/iberspeech_dev2/vad.scp'
fd = open_or_fd(vad_path)
vads = {}
try:
    for line in fd:
        key, rxfile = line.decode().split(' ')
        vads[key] = read_vec_flt(rxfile).astype(bool)
finally:
    if fd is not vad_path: fd.close()

print('diarizing the test dataset ...')
hypothesis = {}
metric = DiarizationErrorRate(collar=0.250, skip_overlap=True)
for f in tqdm(feats):
    fname = f[5:-2]

    if fname == "LN24H-20151125":
        continue

    spkpath = 'dev2/hyp/' + fname + '.rttm'

    lab = np.loadtxt('dev2/spk/' + fname + '.spk',
예제 #22
0
def read_xvec(file):
    return read_vec_flt(file)