Example #1
0
 def _init_ivectors(self):
     """ Add i-vectors if applicable.
     """
     args = self.args
     self.ivectors = None
     if args.ivectors is not None:
         io.log('Loading i-vectors from {}'.format(args.ivectors))
         self.ivectors = ivector_ark_read(args.ivectors)
Example #2
0
def main():
    import argparse
    desc = 'Perform sanity check for i-vector grouping'
    parser = argparse.ArgumentParser(
        description=desc,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('ivector_ark', help='i-vectors ark file')
    parser.add_argument('ivec2group', help='i-vector to group mapping')
    parser.add_argument('train_keys', help='Keys for training set')
    parser.add_argument('--cov-type',
                        default='diag',
                        help='GMM covariance type (full|tied|diag|spherical)')
    parser.add_argument('--ignore',
                        nargs='+',
                        default=[],
                        help='Ignore these groups')
    parser.add_argument('--test-keys', help='Keys for test set. If not ' + \
                        'specified, treat keys not in training set as test.')
    args = parser.parse_args()

    io.log('Reading i-vector ark from {}'.format(args.ivector_ark))
    ivectors = ivector_ark_read(args.ivector_ark)
    io.log('Reading i-vector grouping from {}'.format(args.ivec2group))
    ivec2group = io.dict_read(args.ivec2group)
    io.log('Ignore list: {}'.format(args.ignore))
    all_keys = [x for x in ivec2group if ivec2group[x] not in args.ignore]
    io.log('Reading training keys from {}'.format(args.train_keys))
    train_keys = set(io.read_lines(args.train_keys))
    test_keys = None
    if args.test_keys is not None:
        io.log('Reading test keys from {}'.format(args.test_keys))
        test_keys = set(io.read_lines(args.test_keys))

    train_ivec2group, test_ivec2group = OrderedDict(), OrderedDict()
    for k in all_keys:
        if k in ivectors:
            if k in train_keys:
                train_ivec2group[k] = ivec2group[k]
            elif test_keys is None or k in test_keys:
                test_ivec2group[k] = ivec2group[k]
    test_keys = test_ivec2group.keys()
    io.log('Train: {}, Test: {}'.format(len(train_keys), len(test_keys)))
    train_group2ivecs = common.make_reverse_index(train_ivec2group)

    io.log('GMM covariance type: {}'.format(args.cov_type))
    # Fit GMM and do prediction
    corr_lbls = map(lambda x: test_ivec2group[x], test_keys)
    pred_lbls = run(ivectors, train_group2ivecs, test_keys, args.cov_type)
    # Report results
    acc = 100 * accuracy_score(corr_lbls, pred_lbls)
    print 'Overall accuracy: {:.2f} (%)'.format(acc)
    group_accs, group_names = comp_UARs(corr_lbls, pred_lbls)
    print 'Mean per-group accuracy: {:.2f} (%)'.format(100 *
                                                       np.mean(group_accs))
    print 'Individual group accuracies:'
    for group_acc, group_name in zip(group_accs, group_names):
        print '\t{} - {} (%)'.format(group_name, 100 * group_acc)
def main():
    import argparse
    desc = 'Perform sanity check for i-vector grouping'
    parser = argparse.ArgumentParser(
        description=desc,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('ivector_ark', help='i-vectors ark file')
    parser.add_argument('ivec2group', help='i-vector to group mapping')
    parser.add_argument('ivec2spk', help='i-vector to speaker mapping')
    parser.add_argument('--cov-type',
                        default='diag',
                        help='GMM covariance type (full|tied|diag|spherical)')
    args = parser.parse_args()

    io.log('Reading i-vector ark from {}'.format(args.ivector_ark))
    ivectors = ivector_ark_read(args.ivector_ark)
    io.log('Reading i-vector grouping from {}'.format(args.ivec2group))
    ivec2group = io.dict_read(args.ivec2group)
    group2ivecs = common.make_reverse_index(ivec2group)
    io.log('Reading i-vector to speaker mapping from {}'.format(args.ivec2spk))
    ivec2spk = io.dict_read(args.ivec2spk)
    spk2ivecs = common.make_reverse_index(ivec2spk)

    io.log('GMM covariance type: {}'.format(args.cov_type))
    spks = []
    corr_lbls = []
    pred_lbls = []
    for i, spk in enumerate(spk2ivecs):
        io.log('--- Held-out spk: {} ({} / {}) ---'.format(
            spk, i + 1, len(spk2ivecs)))
        # Common base for training i-vectors
        train_group2ivecs = OrderedDict()
        for other_spk in spk2ivecs:
            if other_spk == spk:
                continue
            for ivec in spk2ivecs[other_spk]:
                group = ivec2group[ivec]
                if group not in train_group2ivecs:
                    train_group2ivecs[group] = []
                train_group2ivecs[group].append(ivec)
        # Get test i-vectors
        test_ivecs = spk2ivecs[spk]
        # Get results
        try:
            preds = run(ivectors, train_group2ivecs, test_ivecs, args.cov_type)
            spks.extend([spk] * len(test_ivecs))
            corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs))
            pred_lbls.extend(preds)
        except RuntimeError:
            traceback.print_exc()
            io.log('...skipping {}'.format(spk))

    # Report results
    report(corr_lbls, pred_lbls, spks)
Example #4
0
def main():
    desc = 'Convert from speaker i-vectors to utt-ivectors. Output to stdout.'
    parser = common.init_argparse(desc)
    parser.add_argument('spk_ivectors', help='File containing spk i-vectors.')
    parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.')
    args = parser.parse_args()

    spk_ivectors = ivector_ark_read(args.spk_ivectors)
    utt2spk = io.dict_read(args.utt2spk, ordered=True)
    spk2utt = common.make_reverse_index(utt2spk, ordered=True)

    wrote = 0
    for spk in spk2utt.keys():
        for utt in spk2utt[spk]:
            print_vector(utt, spk_ivectors[spk])
            wrote += 1
    io.log('Wrote {} utt i-vectors for {} spks'.format(wrote, len(spk2utt)))
Example #5
0
def main():
    desc = 'Convert from utt i-vectors to spk-ivectors. NOTE: this ' + \
           'script does not check the values of utt i-vectors that belong ' + \
           'to the same spk. It will simply treat the first utt i-vector ' + \
           'it finds from a spk as the i-vector for that spk. Output to stdout.'
    parser = common.init_argparse(desc)
    parser.add_argument('utt_ivectors', help='File containing utt i-vectors.')
    parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.')
    args = parser.parse_args()

    utt_ivectors = ivector_ark_read(args.utt_ivectors, ordered=True)
    utt2spk = io.dict_read(args.utt2spk)

    processed_spks = set()
    for utt in utt_ivectors.keys():
        spk = utt2spk[utt]
        if spk in processed_spks:
            continue
        print_vector(spk, utt_ivectors[utt])
        processed_spks.add(spk)
    io.log('Wrote {} spk i-vectors'.format(len(processed_spks)))
Example #6
0
def main():
    desc = 'Outputs Kaldi-compatible log-likelihood to stdout using a pdnn ' + \
           'model. This mimics the design of Kaldi nnet-forward. Use this ' + \
           'for networks that cannot be converted to Kaldi, e.g. factored model'
    parser = common.init_argparse(desc)
    parser.add_argument('model_in', help='Model that can be read by load_dnn')
    parser.add_argument('feats_scp', help='scp of input features')
    parser.add_argument('--context',
                        type=int,
                        default=8,
                        help='Number of context frames for splicing')
    parser.add_argument('--padding', default='replicate',
                        help='What to do with out-of-bound frames. Valid ' + \
                             'values: [replicate|zero]')
    parser.add_argument('--class-frame-counts', help='Kaldi vector with ' + \
                        'frame-counts of pdfs to compute log-priors')
    parser.add_argument('--prior-floor', type=float, default=1e-10,
                        help='Flooring constant for prior probability, ' + \
                             'i.e. pdfs with prior smaller than this ' + \
                             'value will be ignored during decoding.')
    parser.add_argument('--ivectors', help='Utterance i-vectors to append')
    parser.add_argument('--chunk-size',
                        default='300m',
                        help='Chunk size for data buffering')
    args = parser.parse_args()

    io.log('Initializing dataset')
    ivectors = None if args.ivectors is None else \
            ivector_ark_read(args.ivectors, dtype=theano.config.floatX)
    dataset = init_dataset(args.feats_scp, args.context, args.padding,
                           ivectors)
    io.log('Initializing model')
    dnn = load_dnn(args.model_in)
    io.log('Initializing priors')
    log_priors = get_log_priors(args.class_frame_counts, args.prior_floor)

    # Initializing shared_ds according to chunk_size
    num_items = get_num_items(args.chunk_size, theano.config.floatX)
    max_frames = num_items / dataset.get_dim()
    max_utt_frames = np.max(
        map(dataset.get_num_frames_by_utt_name, dataset.get_utt_names()))
    common.CHK_GE(max_frames, max_utt_frames)
    x = np.zeros((max_frames, dataset.get_dim()), dtype=theano.config.floatX)
    shared_x = theano.shared(x, name='x', borrow=True)
    io.log('Using shared_x with size {} ({})'.format(x.shape, args.chunk_size))
    io.log('...getting output function')
    output_fn = dnn.build_output_function(shared_x)
    io.log('Got it!')

    io.log('** Begin outputting **')
    utt_names, utt_frames, total_frames = [], [], 0
    for utt in dataset.get_utt_names():
        frames = dataset.get_num_frames_by_utt_name(utt)
        if total_frames + frames > max_frames:
            __nnet_fwd(output_fn, dataset, x, shared_x, utt_names, utt_frames,
                       log_priors)
            utt_names, utt_frames, total_frames = [], [], 0
        utt_names.append(utt)
        utt_frames.append(frames)
        total_frames += frames
    __nnet_fwd(output_fn, dataset, x, shared_x, utt_names, utt_frames,
               log_priors)
Example #7
0
def main():
    desc = 'Outputs Kaldi-compatible log-likelihood to stdout using a ' + \
           'Keras model. This mimics the design of Kaldi nnet-forward.'
    parser = common.init_argparse(desc)
    parser.add_argument('model_json', help='JSON description of the model')
    parser.add_argument('model_weights', help='File containing model weights')
    parser.add_argument('feats_scp', help='scp of input features')
    parser.add_argument('--context', type=int, default=8,
                        help='Number of context frames for splicing')
    parser.add_argument('--padding', default='replicate',
                        help='What to do with out-of-bound frames. Valid ' + \
                             'values: [replicate|zero]')
    parser.add_argument('--primary-task', type=int,
                        help='Set to enable multi-task model decoding')
    parser.add_argument('--nutts', type=int, default=10,
                        help='How many utterances to feed to the model at once')
    parser.add_argument('--delay', type=int, default=5,
                        help='Output delay in frames')
    parser.add_argument('--class-frame-counts', help='Kaldi vector with ' + \
                        'frame-counts of pdfs to compute log-priors')
    parser.add_argument('--prior-floor', type=float, default=1e-10,
                        help='Flooring constant for prior probability, ' + \
                             'i.e. pdfs with prior smaller than this ' + \
                             'value will be ignored during decoding.')
    parser.add_argument('--ivectors', help='Utterance i-vectors to append')
    args = parser.parse_args()

    io.log('Initializing dataset')
    ivectors = None if args.ivectors is None else \
            ivector_ark_read(args.ivectors, dtype=np.float32)
    buf_ds = init_dataset(
        args.feats_scp, args.context, args.padding,
        args.nutts, args.delay, ivectors
    )
    io.log('Initializing model')
    json_str = io.json_load(args.model_json)
    model = model_from_json(json_str)
    model.load_weights(args.model_weights)
    io.log('Initializing priors')
    log_priors = get_log_priors(args.class_frame_counts, args.prior_floor)
    if args.primary_task is not None:
        io.log('Multi-task decoding enabled, primary task {}'.format(args.primary_task))

    io.log('** Begin outputting **')
    while True:
        # Load data chunk
        chunk = buf_ds.read_next_chunk()
        if chunk is None:
            break
        Xs, _, eobs, utt_indices = chunk
        X = Xs[0]
        eob = eobs[0]
        utt_names = buf_ds.dataset().get_utt_names_by_utt_indices(utt_indices)
        y = model.predict(X, batch_size=len(utt_indices), verbose=0)
        if args.primary_task is not None:
            y = y[args.primary_task]
        y = np.log(y, y)
        if log_priors is not None:
            y -= log_priors
        for i in range(len(utt_indices)):
            print_matrix(utt_names[i], y[i][buf_ds.get_delay():eob[i]])
def main():
    import argparse
    desc = 'Perform sanity check for i-vector grouping'
    parser = argparse.ArgumentParser(
            description=desc,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('ivector_ark', help='i-vectors ark file')
    parser.add_argument('ivec2group', help='i-vector to group mapping')
    parser.add_argument('--cov-type', default='diag',
                        help='GMM covariance type (full|tied|diag|spherical)')
    parser.add_argument('--withheld-frac', type=float,
                        help='Fraction of i-vectors retained from each group ' \
                                + 'for testing. If not set, use the same ' \
                                + 'data for training and testing.')
    args = parser.parse_args()

    io.log('Reading i-vector ark from {}'.format(args.ivector_ark))
    ivectors = ivector_ark_read(args.ivector_ark)
    io.log('Reading i-vector grouping from {}'.format(args.ivec2group))
    ivec2group = io.dict_read(args.ivec2group)
    group2ivecs = common.make_reverse_index(ivec2group)

    io.log('GMM covariance type: {}'.format(args.cov_type))
    io.log('Withheld fraction: {}'.format(args.withheld_frac))
    # Fit GMM and do prediction
    if args.withheld_frac is None:
        corr_lbls = map(lambda x: ivec2group[x], ivectors.keys())
        pred_lbls = run(ivectors, group2ivecs, ivectors.keys(), args.cov_type)
    else:
        corr_lbls = []
        pred_lbls = []
        for group in group2ivecs:
            # Common base for training i-vectors
            train_group2ivecs = OrderedDict()
            for other_group in group2ivecs:
                if other_group != group:
                    train_group2ivecs[other_group] = group2ivecs[other_group]
            # Get partitions of test i-vectors and step through each one
            test_partitions = partition(group2ivecs[group], args.withheld_frac)
            for i in range(len(test_partitions)):
                io.log('-- Partition {} / {} for {}'.format(
                    i + 1, len(test_partitions), group
                ))
                test_ivecs = test_partitions[i]
                # Get training i-vectors for this group
                train_ivecs = []
                for j in range(len(test_partitions)):
                    if j != i:
                        train_ivecs.extend(test_partitions[j])
                train_group2ivecs[group] = train_ivecs
                # Get results
                corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs))
                pred_lbls.extend(run(ivectors, train_group2ivecs,
                                     test_ivecs, args.cov_type))

    # Report results
    acc = 100 * accuracy_score(corr_lbls, pred_lbls)
    print 'Overall accuracy: {} (%)'.format(acc)
    group_accs, group_names = comp_UARs(corr_lbls, pred_lbls)
    print 'Mean per-group accuracy: {} (%)'.format(100 * np.mean(group_accs))
    print 'Individual group accuracies:'
    for group_acc, group_name in zip(group_accs, group_names):
        print '\t{} - {} (%)'.format(group_name, 100 * group_acc)