コード例 #1
0
ファイル: phone2word_ali.py プロジェクト: ducle90/chai_share
def main():
    desc = 'Convert phone to word alignment. Output to stdout.'
    parser = common.init_argparse(desc)
    parser.add_argument('ali_phones_with_length',
                        help='File containing phone alignment with length ' + \
                        '(generated with ali-to-phones --write-lengths=true)')
    parser.add_argument('text', help='Kaldi word-level transcript')
    parser.add_argument('phone_map', help='Mapping from text to phone ID. ' + \
                        'We expect each line to have two tokens separated ' + \
                        'by whitespace, where the first token is the phone ' + \
                        'and the second token is the ID number.')
    parser.add_argument('lexicon', help='Pronunciation lexicon')
    parser.add_argument('--sil-phones',
                        nargs='+',
                        default=[],
                        help='IDs of phones regarded as silence')
    parser.add_argument('--sil-label',
                        default='sil',
                        help='Label of silence phone/word to use in output')
    args = parser.parse_args()

    alis = ali_with_length_read(args.ali_phones_with_length,
                                ordered=True,
                                expand=False)
    io.log('Loaded {} alignments'.format(len(alis)))
    text = io.dict_read(args.text, lst=True)
    io.log('Loaded transcript containing {} utterances'.format(len(text)))
    phone2id = io.dict_read(args.phone_map)
    io.log('Loaded phone2id containing {} phones'.format(len(phone2id)))
    id2phone = {}
    # We normalize the phone name so that IDs of phone variants will map to
    # the primary phone. For example, IDs of sil, sil_B, sil_E, sil_I, sil_S
    # will all map to sil. The assumption here is that anything after and
    # including the '_' character is not part of the primary phone name.
    for phone in phone2id.keys():
        nphone = phone.split('_')[0]
        id2phone[phone2id[phone]] = nphone
    io.log('Total phones in id2phone: {}'.format(len(set(id2phone.values()))))
    lexicon = io.lexicon_read(args.lexicon)
    io.log('Loaded lexicon containing {} words'.format(len(lexicon)))
    sil_phones = set(args.sil_phones)
    io.log('sil_phones: {} ({}), sil_label: {}'.format(
        sil_phones, [id2phone[i] for i in sil_phones], args.sil_label))

    for key in alis:
        phone_tokens, length = get_phone_tokens(alis[key], id2phone,
                                                sil_phones)
        if len(phone_tokens) == 0:
            io.log('WARNING: {} - no non-silence tokens'.format(key))
            continue
        if key not in text:
            io.log('WARNING: {} not in text'.format(key))
            continue
        phone2word_ali(key, phone_tokens, text[key], lexicon, args.sil_label,
                       length)
コード例 #2
0
def main():
    import argparse
    desc = 'Perform sanity check for i-vector grouping'
    parser = argparse.ArgumentParser(
        description=desc,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('ivector_ark', help='i-vectors ark file')
    parser.add_argument('ivec2group', help='i-vector to group mapping')
    parser.add_argument('ivec2spk', help='i-vector to speaker mapping')
    parser.add_argument('--cov-type',
                        default='diag',
                        help='GMM covariance type (full|tied|diag|spherical)')
    args = parser.parse_args()

    io.log('Reading i-vector ark from {}'.format(args.ivector_ark))
    ivectors = ivector_ark_read(args.ivector_ark)
    io.log('Reading i-vector grouping from {}'.format(args.ivec2group))
    ivec2group = io.dict_read(args.ivec2group)
    group2ivecs = common.make_reverse_index(ivec2group)
    io.log('Reading i-vector to speaker mapping from {}'.format(args.ivec2spk))
    ivec2spk = io.dict_read(args.ivec2spk)
    spk2ivecs = common.make_reverse_index(ivec2spk)

    io.log('GMM covariance type: {}'.format(args.cov_type))
    spks = []
    corr_lbls = []
    pred_lbls = []
    for i, spk in enumerate(spk2ivecs):
        io.log('--- Held-out spk: {} ({} / {}) ---'.format(
            spk, i + 1, len(spk2ivecs)))
        # Common base for training i-vectors
        train_group2ivecs = OrderedDict()
        for other_spk in spk2ivecs:
            if other_spk == spk:
                continue
            for ivec in spk2ivecs[other_spk]:
                group = ivec2group[ivec]
                if group not in train_group2ivecs:
                    train_group2ivecs[group] = []
                train_group2ivecs[group].append(ivec)
        # Get test i-vectors
        test_ivecs = spk2ivecs[spk]
        # Get results
        try:
            preds = run(ivectors, train_group2ivecs, test_ivecs, args.cov_type)
            spks.extend([spk] * len(test_ivecs))
            corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs))
            pred_lbls.extend(preds)
        except RuntimeError:
            traceback.print_exc()
            io.log('...skipping {}'.format(spk))

    # Report results
    report(corr_lbls, pred_lbls, spks)
コード例 #3
0
ファイル: id2text.py プロジェクト: ducle90/chai_share
def main():
    desc = 'Kaldi outputs token IDs in numbers. We can map them back to ' + \
            'textual form given an ID to text mapping. Will output to stdout.'
    parser = common.init_argparse(desc)
    parser.add_argument('fname', help='File to process. We expect each line ' + \
                        'to have tokens separated by whitespace, where ' + \
                        'the first token is a key or name (e.g. utt name) ' + \
                        'that can be skipped, and the rest are ID numbers.')
    parser.add_argument('id_map', help='Mapping from textual form to ID. ' + \
                        'We expect each line to have two tokens separated ' + \
                        'by whitespace, where the first token is the text ' + \
                        'and the second token is the ID number.')
    args = parser.parse_args()

    id_map = common.make_reverse_index(io.dict_read(args.id_map))
    # Check that mapping from number to text is 1-to-1
    for k in id_map.keys():
        if len(id_map[k]) != 1:
            raise ValueError('Mapping at {} not 1-1: {}'.format(k, id_map[k]))
        id_map[k] = id_map[k][0]

    with open(args.fname, 'r') as f:
        for line in f:
            ary = line.strip().split()
            for i in range(1, len(ary)):
                ary[i] = id_map[ary[i]]
            print ' '.join(ary)
コード例 #4
0
def main():
    import argparse
    desc = 'Perform sanity check for i-vector grouping'
    parser = argparse.ArgumentParser(
        description=desc,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('ivector_ark', help='i-vectors ark file')
    parser.add_argument('ivec2group', help='i-vector to group mapping')
    parser.add_argument('train_keys', help='Keys for training set')
    parser.add_argument('--cov-type',
                        default='diag',
                        help='GMM covariance type (full|tied|diag|spherical)')
    parser.add_argument('--ignore',
                        nargs='+',
                        default=[],
                        help='Ignore these groups')
    parser.add_argument('--test-keys', help='Keys for test set. If not ' + \
                        'specified, treat keys not in training set as test.')
    args = parser.parse_args()

    io.log('Reading i-vector ark from {}'.format(args.ivector_ark))
    ivectors = ivector_ark_read(args.ivector_ark)
    io.log('Reading i-vector grouping from {}'.format(args.ivec2group))
    ivec2group = io.dict_read(args.ivec2group)
    io.log('Ignore list: {}'.format(args.ignore))
    all_keys = [x for x in ivec2group if ivec2group[x] not in args.ignore]
    io.log('Reading training keys from {}'.format(args.train_keys))
    train_keys = set(io.read_lines(args.train_keys))
    test_keys = None
    if args.test_keys is not None:
        io.log('Reading test keys from {}'.format(args.test_keys))
        test_keys = set(io.read_lines(args.test_keys))

    train_ivec2group, test_ivec2group = OrderedDict(), OrderedDict()
    for k in all_keys:
        if k in ivectors:
            if k in train_keys:
                train_ivec2group[k] = ivec2group[k]
            elif test_keys is None or k in test_keys:
                test_ivec2group[k] = ivec2group[k]
    test_keys = test_ivec2group.keys()
    io.log('Train: {}, Test: {}'.format(len(train_keys), len(test_keys)))
    train_group2ivecs = common.make_reverse_index(train_ivec2group)

    io.log('GMM covariance type: {}'.format(args.cov_type))
    # Fit GMM and do prediction
    corr_lbls = map(lambda x: test_ivec2group[x], test_keys)
    pred_lbls = run(ivectors, train_group2ivecs, test_keys, args.cov_type)
    # Report results
    acc = 100 * accuracy_score(corr_lbls, pred_lbls)
    print 'Overall accuracy: {:.2f} (%)'.format(acc)
    group_accs, group_names = comp_UARs(corr_lbls, pred_lbls)
    print 'Mean per-group accuracy: {:.2f} (%)'.format(100 *
                                                       np.mean(group_accs))
    print 'Individual group accuracies:'
    for group_acc, group_name in zip(group_accs, group_names):
        print '\t{} - {} (%)'.format(group_name, 100 * group_acc)
コード例 #5
0
def main(args):
    ds = TemporalData.from_kaldi(args.scp)
    io.log('Loaded dataset containing {} utts'.format(len(ds.get_utt_names())))
    utt2label = io.dict_read(args.utt2label)
    io.log('Loaded utt2label containing {} entries'.format(len(utt2label)))

    for utt_name in ds.get_utt_names():
        if utt_name not in utt2label:
            io.log('WARNING: {} not in utt2label, skipping'.format(utt_name))
        lbl = utt2label[utt_name]
        dur = ds.get_num_frames_by_utt_name(utt_name)
        print '{} {}'.format(utt_name, ' '.join([lbl] * dur))
コード例 #6
0
 def _init_records(self):
     """ Setup result caching for parameter combinations. Load existing
     results from disk if possible. Each line will look like this:
         <model_name> <train_err> <valid_err> [<valid_err> ...]
     """
     self.records_fname = os.path.join(self.args.output_dir, 'summary.txt')
     self.records = OrderedDict()
     if os.path.exists(self.records_fname):
         io.log('Loading existing records from {}'.format(
             self.records_fname))
         self.records = io.dict_read(self.records_fname,
                                     ordered=True,
                                     lst=True,
                                     fn=float)
コード例 #7
0
def main():
    desc = 'Convert from speaker i-vectors to utt-ivectors. Output to stdout.'
    parser = common.init_argparse(desc)
    parser.add_argument('spk_ivectors', help='File containing spk i-vectors.')
    parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.')
    args = parser.parse_args()

    spk_ivectors = ivector_ark_read(args.spk_ivectors)
    utt2spk = io.dict_read(args.utt2spk, ordered=True)
    spk2utt = common.make_reverse_index(utt2spk, ordered=True)

    wrote = 0
    for spk in spk2utt.keys():
        for utt in spk2utt[spk]:
            print_vector(utt, spk_ivectors[spk])
            wrote += 1
    io.log('Wrote {} utt i-vectors for {} spks'.format(wrote, len(spk2utt)))
コード例 #8
0
def main():
    desc = 'Reads in a pdf alignment and output prior counts to disk.'
    parser = common.init_argparse(desc)
    parser.add_argument('alipdf', help='pdf alignment file.')
    parser.add_argument('output_fname', help='File to output prior counts to')
    parser.add_argument('--num-pdfs', type=int, help='Number of pdfs. ' + \
                        'If not set, use max value in `alipdf`.')
    args = parser.parse_args()

    alipdf = io.dict_read(args.alipdf)
    pdfs = []
    for utt in alipdf.keys():
        pdfs.extend(numpy.asarray(alipdf[utt], dtype=numpy.int))
    bins = numpy.bincount(pdfs, minlength=args.num_pdfs)

    fw = open(args.output_fname, 'w')
    fw.write('[ {} ]\n'.format(' '.join(numpy.asarray(bins, dtype=numpy.str))))
    fw.close()
コード例 #9
0
ファイル: id2id.py プロジェクト: ducle90/chai_share
def main():
    desc = 'Convert from one mapping to another. Will output to stdout.'
    parser = common.init_argparse(desc)
    parser.add_argument('fname', help='File to process. We expect each line ' + \
                        'to have tokens separated by whitespace, where ' + \
                        'the first token is a key or name (e.g. utt name) ' + \
                        'that can be skipped, and the rest are values.')
    parser.add_argument('id_map', help='Mapping from one ID to another ID. ' + \
                        'Each line has two tokens separated by whitespace.')
    args = parser.parse_args()

    id_map = io.dict_read(args.id_map)
    io.log('Read {} mappings'.format(len(id_map)))

    with open(args.fname, 'r') as f:
        for line in f:
            ary = line.strip().split()
            for i in range(1, len(ary)):
                ary[i] = id_map[ary[i]]
            print ' '.join(ary)
コード例 #10
0
def main():
    desc = 'Convert from utt i-vectors to spk-ivectors. NOTE: this ' + \
           'script does not check the values of utt i-vectors that belong ' + \
           'to the same spk. It will simply treat the first utt i-vector ' + \
           'it finds from a spk as the i-vector for that spk. Output to stdout.'
    parser = common.init_argparse(desc)
    parser.add_argument('utt_ivectors', help='File containing utt i-vectors.')
    parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.')
    args = parser.parse_args()

    utt_ivectors = ivector_ark_read(args.utt_ivectors, ordered=True)
    utt2spk = io.dict_read(args.utt2spk)

    processed_spks = set()
    for utt in utt_ivectors.keys():
        spk = utt2spk[utt]
        if spk in processed_spks:
            continue
        print_vector(spk, utt_ivectors[utt])
        processed_spks.add(spk)
    io.log('Wrote {} spk i-vectors'.format(len(processed_spks)))
コード例 #11
0
def main():
    import argparse
    desc = 'Perform sanity check for i-vector grouping'
    parser = argparse.ArgumentParser(
            description=desc,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('ivector_ark', help='i-vectors ark file')
    parser.add_argument('ivec2group', help='i-vector to group mapping')
    parser.add_argument('--cov-type', default='diag',
                        help='GMM covariance type (full|tied|diag|spherical)')
    parser.add_argument('--withheld-frac', type=float,
                        help='Fraction of i-vectors retained from each group ' \
                                + 'for testing. If not set, use the same ' \
                                + 'data for training and testing.')
    args = parser.parse_args()

    io.log('Reading i-vector ark from {}'.format(args.ivector_ark))
    ivectors = ivector_ark_read(args.ivector_ark)
    io.log('Reading i-vector grouping from {}'.format(args.ivec2group))
    ivec2group = io.dict_read(args.ivec2group)
    group2ivecs = common.make_reverse_index(ivec2group)

    io.log('GMM covariance type: {}'.format(args.cov_type))
    io.log('Withheld fraction: {}'.format(args.withheld_frac))
    # Fit GMM and do prediction
    if args.withheld_frac is None:
        corr_lbls = map(lambda x: ivec2group[x], ivectors.keys())
        pred_lbls = run(ivectors, group2ivecs, ivectors.keys(), args.cov_type)
    else:
        corr_lbls = []
        pred_lbls = []
        for group in group2ivecs:
            # Common base for training i-vectors
            train_group2ivecs = OrderedDict()
            for other_group in group2ivecs:
                if other_group != group:
                    train_group2ivecs[other_group] = group2ivecs[other_group]
            # Get partitions of test i-vectors and step through each one
            test_partitions = partition(group2ivecs[group], args.withheld_frac)
            for i in range(len(test_partitions)):
                io.log('-- Partition {} / {} for {}'.format(
                    i + 1, len(test_partitions), group
                ))
                test_ivecs = test_partitions[i]
                # Get training i-vectors for this group
                train_ivecs = []
                for j in range(len(test_partitions)):
                    if j != i:
                        train_ivecs.extend(test_partitions[j])
                train_group2ivecs[group] = train_ivecs
                # Get results
                corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs))
                pred_lbls.extend(run(ivectors, train_group2ivecs,
                                     test_ivecs, args.cov_type))

    # Report results
    acc = 100 * accuracy_score(corr_lbls, pred_lbls)
    print 'Overall accuracy: {} (%)'.format(acc)
    group_accs, group_names = comp_UARs(corr_lbls, pred_lbls)
    print 'Mean per-group accuracy: {} (%)'.format(100 * np.mean(group_accs))
    print 'Individual group accuracies:'
    for group_acc, group_name in zip(group_accs, group_names):
        print '\t{} - {} (%)'.format(group_name, 100 * group_acc)