def _init_ivectors(self): """ Add i-vectors if applicable. """ args = self.args self.ivectors = None if args.ivectors is not None: io.log('Loading i-vectors from {}'.format(args.ivectors)) self.ivectors = ivector_ark_read(args.ivectors)
def main(): import argparse desc = 'Perform sanity check for i-vector grouping' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('ivector_ark', help='i-vectors ark file') parser.add_argument('ivec2group', help='i-vector to group mapping') parser.add_argument('train_keys', help='Keys for training set') parser.add_argument('--cov-type', default='diag', help='GMM covariance type (full|tied|diag|spherical)') parser.add_argument('--ignore', nargs='+', default=[], help='Ignore these groups') parser.add_argument('--test-keys', help='Keys for test set. If not ' + \ 'specified, treat keys not in training set as test.') args = parser.parse_args() io.log('Reading i-vector ark from {}'.format(args.ivector_ark)) ivectors = ivector_ark_read(args.ivector_ark) io.log('Reading i-vector grouping from {}'.format(args.ivec2group)) ivec2group = io.dict_read(args.ivec2group) io.log('Ignore list: {}'.format(args.ignore)) all_keys = [x for x in ivec2group if ivec2group[x] not in args.ignore] io.log('Reading training keys from {}'.format(args.train_keys)) train_keys = set(io.read_lines(args.train_keys)) test_keys = None if args.test_keys is not None: io.log('Reading test keys from {}'.format(args.test_keys)) test_keys = set(io.read_lines(args.test_keys)) train_ivec2group, test_ivec2group = OrderedDict(), OrderedDict() for k in all_keys: if k in ivectors: if k in train_keys: train_ivec2group[k] = ivec2group[k] elif test_keys is None or k in test_keys: test_ivec2group[k] = ivec2group[k] test_keys = test_ivec2group.keys() io.log('Train: {}, Test: {}'.format(len(train_keys), len(test_keys))) train_group2ivecs = common.make_reverse_index(train_ivec2group) io.log('GMM covariance type: {}'.format(args.cov_type)) # Fit GMM and do prediction corr_lbls = map(lambda x: test_ivec2group[x], test_keys) pred_lbls = run(ivectors, train_group2ivecs, test_keys, args.cov_type) # Report results acc = 100 * accuracy_score(corr_lbls, pred_lbls) print 'Overall accuracy: {:.2f} (%)'.format(acc) group_accs, group_names = comp_UARs(corr_lbls, pred_lbls) print 'Mean per-group accuracy: {:.2f} (%)'.format(100 * np.mean(group_accs)) print 'Individual group accuracies:' for group_acc, group_name in zip(group_accs, group_names): print '\t{} - {} (%)'.format(group_name, 100 * group_acc)
def main(): import argparse desc = 'Perform sanity check for i-vector grouping' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('ivector_ark', help='i-vectors ark file') parser.add_argument('ivec2group', help='i-vector to group mapping') parser.add_argument('ivec2spk', help='i-vector to speaker mapping') parser.add_argument('--cov-type', default='diag', help='GMM covariance type (full|tied|diag|spherical)') args = parser.parse_args() io.log('Reading i-vector ark from {}'.format(args.ivector_ark)) ivectors = ivector_ark_read(args.ivector_ark) io.log('Reading i-vector grouping from {}'.format(args.ivec2group)) ivec2group = io.dict_read(args.ivec2group) group2ivecs = common.make_reverse_index(ivec2group) io.log('Reading i-vector to speaker mapping from {}'.format(args.ivec2spk)) ivec2spk = io.dict_read(args.ivec2spk) spk2ivecs = common.make_reverse_index(ivec2spk) io.log('GMM covariance type: {}'.format(args.cov_type)) spks = [] corr_lbls = [] pred_lbls = [] for i, spk in enumerate(spk2ivecs): io.log('--- Held-out spk: {} ({} / {}) ---'.format( spk, i + 1, len(spk2ivecs))) # Common base for training i-vectors train_group2ivecs = OrderedDict() for other_spk in spk2ivecs: if other_spk == spk: continue for ivec in spk2ivecs[other_spk]: group = ivec2group[ivec] if group not in train_group2ivecs: train_group2ivecs[group] = [] train_group2ivecs[group].append(ivec) # Get test i-vectors test_ivecs = spk2ivecs[spk] # Get results try: preds = run(ivectors, train_group2ivecs, test_ivecs, args.cov_type) spks.extend([spk] * len(test_ivecs)) corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs)) pred_lbls.extend(preds) except RuntimeError: traceback.print_exc() io.log('...skipping {}'.format(spk)) # Report results report(corr_lbls, pred_lbls, spks)
def main(): desc = 'Convert from speaker i-vectors to utt-ivectors. Output to stdout.' parser = common.init_argparse(desc) parser.add_argument('spk_ivectors', help='File containing spk i-vectors.') parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.') args = parser.parse_args() spk_ivectors = ivector_ark_read(args.spk_ivectors) utt2spk = io.dict_read(args.utt2spk, ordered=True) spk2utt = common.make_reverse_index(utt2spk, ordered=True) wrote = 0 for spk in spk2utt.keys(): for utt in spk2utt[spk]: print_vector(utt, spk_ivectors[spk]) wrote += 1 io.log('Wrote {} utt i-vectors for {} spks'.format(wrote, len(spk2utt)))
def main(): desc = 'Convert from utt i-vectors to spk-ivectors. NOTE: this ' + \ 'script does not check the values of utt i-vectors that belong ' + \ 'to the same spk. It will simply treat the first utt i-vector ' + \ 'it finds from a spk as the i-vector for that spk. Output to stdout.' parser = common.init_argparse(desc) parser.add_argument('utt_ivectors', help='File containing utt i-vectors.') parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.') args = parser.parse_args() utt_ivectors = ivector_ark_read(args.utt_ivectors, ordered=True) utt2spk = io.dict_read(args.utt2spk) processed_spks = set() for utt in utt_ivectors.keys(): spk = utt2spk[utt] if spk in processed_spks: continue print_vector(spk, utt_ivectors[utt]) processed_spks.add(spk) io.log('Wrote {} spk i-vectors'.format(len(processed_spks)))
def main(): desc = 'Outputs Kaldi-compatible log-likelihood to stdout using a pdnn ' + \ 'model. This mimics the design of Kaldi nnet-forward. Use this ' + \ 'for networks that cannot be converted to Kaldi, e.g. factored model' parser = common.init_argparse(desc) parser.add_argument('model_in', help='Model that can be read by load_dnn') parser.add_argument('feats_scp', help='scp of input features') parser.add_argument('--context', type=int, default=8, help='Number of context frames for splicing') parser.add_argument('--padding', default='replicate', help='What to do with out-of-bound frames. Valid ' + \ 'values: [replicate|zero]') parser.add_argument('--class-frame-counts', help='Kaldi vector with ' + \ 'frame-counts of pdfs to compute log-priors') parser.add_argument('--prior-floor', type=float, default=1e-10, help='Flooring constant for prior probability, ' + \ 'i.e. pdfs with prior smaller than this ' + \ 'value will be ignored during decoding.') parser.add_argument('--ivectors', help='Utterance i-vectors to append') parser.add_argument('--chunk-size', default='300m', help='Chunk size for data buffering') args = parser.parse_args() io.log('Initializing dataset') ivectors = None if args.ivectors is None else \ ivector_ark_read(args.ivectors, dtype=theano.config.floatX) dataset = init_dataset(args.feats_scp, args.context, args.padding, ivectors) io.log('Initializing model') dnn = load_dnn(args.model_in) io.log('Initializing priors') log_priors = get_log_priors(args.class_frame_counts, args.prior_floor) # Initializing shared_ds according to chunk_size num_items = get_num_items(args.chunk_size, theano.config.floatX) max_frames = num_items / dataset.get_dim() max_utt_frames = np.max( map(dataset.get_num_frames_by_utt_name, dataset.get_utt_names())) common.CHK_GE(max_frames, max_utt_frames) x = np.zeros((max_frames, dataset.get_dim()), dtype=theano.config.floatX) shared_x = theano.shared(x, name='x', borrow=True) io.log('Using shared_x with size {} ({})'.format(x.shape, args.chunk_size)) io.log('...getting output function') output_fn = dnn.build_output_function(shared_x) io.log('Got it!') io.log('** Begin outputting **') utt_names, utt_frames, total_frames = [], [], 0 for utt in dataset.get_utt_names(): frames = dataset.get_num_frames_by_utt_name(utt) if total_frames + frames > max_frames: __nnet_fwd(output_fn, dataset, x, shared_x, utt_names, utt_frames, log_priors) utt_names, utt_frames, total_frames = [], [], 0 utt_names.append(utt) utt_frames.append(frames) total_frames += frames __nnet_fwd(output_fn, dataset, x, shared_x, utt_names, utt_frames, log_priors)
def main(): desc = 'Outputs Kaldi-compatible log-likelihood to stdout using a ' + \ 'Keras model. This mimics the design of Kaldi nnet-forward.' parser = common.init_argparse(desc) parser.add_argument('model_json', help='JSON description of the model') parser.add_argument('model_weights', help='File containing model weights') parser.add_argument('feats_scp', help='scp of input features') parser.add_argument('--context', type=int, default=8, help='Number of context frames for splicing') parser.add_argument('--padding', default='replicate', help='What to do with out-of-bound frames. Valid ' + \ 'values: [replicate|zero]') parser.add_argument('--primary-task', type=int, help='Set to enable multi-task model decoding') parser.add_argument('--nutts', type=int, default=10, help='How many utterances to feed to the model at once') parser.add_argument('--delay', type=int, default=5, help='Output delay in frames') parser.add_argument('--class-frame-counts', help='Kaldi vector with ' + \ 'frame-counts of pdfs to compute log-priors') parser.add_argument('--prior-floor', type=float, default=1e-10, help='Flooring constant for prior probability, ' + \ 'i.e. pdfs with prior smaller than this ' + \ 'value will be ignored during decoding.') parser.add_argument('--ivectors', help='Utterance i-vectors to append') args = parser.parse_args() io.log('Initializing dataset') ivectors = None if args.ivectors is None else \ ivector_ark_read(args.ivectors, dtype=np.float32) buf_ds = init_dataset( args.feats_scp, args.context, args.padding, args.nutts, args.delay, ivectors ) io.log('Initializing model') json_str = io.json_load(args.model_json) model = model_from_json(json_str) model.load_weights(args.model_weights) io.log('Initializing priors') log_priors = get_log_priors(args.class_frame_counts, args.prior_floor) if args.primary_task is not None: io.log('Multi-task decoding enabled, primary task {}'.format(args.primary_task)) io.log('** Begin outputting **') while True: # Load data chunk chunk = buf_ds.read_next_chunk() if chunk is None: break Xs, _, eobs, utt_indices = chunk X = Xs[0] eob = eobs[0] utt_names = buf_ds.dataset().get_utt_names_by_utt_indices(utt_indices) y = model.predict(X, batch_size=len(utt_indices), verbose=0) if args.primary_task is not None: y = y[args.primary_task] y = np.log(y, y) if log_priors is not None: y -= log_priors for i in range(len(utt_indices)): print_matrix(utt_names[i], y[i][buf_ds.get_delay():eob[i]])
def main(): import argparse desc = 'Perform sanity check for i-vector grouping' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('ivector_ark', help='i-vectors ark file') parser.add_argument('ivec2group', help='i-vector to group mapping') parser.add_argument('--cov-type', default='diag', help='GMM covariance type (full|tied|diag|spherical)') parser.add_argument('--withheld-frac', type=float, help='Fraction of i-vectors retained from each group ' \ + 'for testing. If not set, use the same ' \ + 'data for training and testing.') args = parser.parse_args() io.log('Reading i-vector ark from {}'.format(args.ivector_ark)) ivectors = ivector_ark_read(args.ivector_ark) io.log('Reading i-vector grouping from {}'.format(args.ivec2group)) ivec2group = io.dict_read(args.ivec2group) group2ivecs = common.make_reverse_index(ivec2group) io.log('GMM covariance type: {}'.format(args.cov_type)) io.log('Withheld fraction: {}'.format(args.withheld_frac)) # Fit GMM and do prediction if args.withheld_frac is None: corr_lbls = map(lambda x: ivec2group[x], ivectors.keys()) pred_lbls = run(ivectors, group2ivecs, ivectors.keys(), args.cov_type) else: corr_lbls = [] pred_lbls = [] for group in group2ivecs: # Common base for training i-vectors train_group2ivecs = OrderedDict() for other_group in group2ivecs: if other_group != group: train_group2ivecs[other_group] = group2ivecs[other_group] # Get partitions of test i-vectors and step through each one test_partitions = partition(group2ivecs[group], args.withheld_frac) for i in range(len(test_partitions)): io.log('-- Partition {} / {} for {}'.format( i + 1, len(test_partitions), group )) test_ivecs = test_partitions[i] # Get training i-vectors for this group train_ivecs = [] for j in range(len(test_partitions)): if j != i: train_ivecs.extend(test_partitions[j]) train_group2ivecs[group] = train_ivecs # Get results corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs)) pred_lbls.extend(run(ivectors, train_group2ivecs, test_ivecs, args.cov_type)) # Report results acc = 100 * accuracy_score(corr_lbls, pred_lbls) print 'Overall accuracy: {} (%)'.format(acc) group_accs, group_names = comp_UARs(corr_lbls, pred_lbls) print 'Mean per-group accuracy: {} (%)'.format(100 * np.mean(group_accs)) print 'Individual group accuracies:' for group_acc, group_name in zip(group_accs, group_names): print '\t{} - {} (%)'.format(group_name, 100 * group_acc)