def main(): desc = 'Convert phone to word alignment. Output to stdout.' parser = common.init_argparse(desc) parser.add_argument('ali_phones_with_length', help='File containing phone alignment with length ' + \ '(generated with ali-to-phones --write-lengths=true)') parser.add_argument('text', help='Kaldi word-level transcript') parser.add_argument('phone_map', help='Mapping from text to phone ID. ' + \ 'We expect each line to have two tokens separated ' + \ 'by whitespace, where the first token is the phone ' + \ 'and the second token is the ID number.') parser.add_argument('lexicon', help='Pronunciation lexicon') parser.add_argument('--sil-phones', nargs='+', default=[], help='IDs of phones regarded as silence') parser.add_argument('--sil-label', default='sil', help='Label of silence phone/word to use in output') args = parser.parse_args() alis = ali_with_length_read(args.ali_phones_with_length, ordered=True, expand=False) io.log('Loaded {} alignments'.format(len(alis))) text = io.dict_read(args.text, lst=True) io.log('Loaded transcript containing {} utterances'.format(len(text))) phone2id = io.dict_read(args.phone_map) io.log('Loaded phone2id containing {} phones'.format(len(phone2id))) id2phone = {} # We normalize the phone name so that IDs of phone variants will map to # the primary phone. For example, IDs of sil, sil_B, sil_E, sil_I, sil_S # will all map to sil. The assumption here is that anything after and # including the '_' character is not part of the primary phone name. for phone in phone2id.keys(): nphone = phone.split('_')[0] id2phone[phone2id[phone]] = nphone io.log('Total phones in id2phone: {}'.format(len(set(id2phone.values())))) lexicon = io.lexicon_read(args.lexicon) io.log('Loaded lexicon containing {} words'.format(len(lexicon))) sil_phones = set(args.sil_phones) io.log('sil_phones: {} ({}), sil_label: {}'.format( sil_phones, [id2phone[i] for i in sil_phones], args.sil_label)) for key in alis: phone_tokens, length = get_phone_tokens(alis[key], id2phone, sil_phones) if len(phone_tokens) == 0: io.log('WARNING: {} - no non-silence tokens'.format(key)) continue if key not in text: io.log('WARNING: {} not in text'.format(key)) continue phone2word_ali(key, phone_tokens, text[key], lexicon, args.sil_label, length)
def main(): import argparse desc = 'Perform sanity check for i-vector grouping' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('ivector_ark', help='i-vectors ark file') parser.add_argument('ivec2group', help='i-vector to group mapping') parser.add_argument('ivec2spk', help='i-vector to speaker mapping') parser.add_argument('--cov-type', default='diag', help='GMM covariance type (full|tied|diag|spherical)') args = parser.parse_args() io.log('Reading i-vector ark from {}'.format(args.ivector_ark)) ivectors = ivector_ark_read(args.ivector_ark) io.log('Reading i-vector grouping from {}'.format(args.ivec2group)) ivec2group = io.dict_read(args.ivec2group) group2ivecs = common.make_reverse_index(ivec2group) io.log('Reading i-vector to speaker mapping from {}'.format(args.ivec2spk)) ivec2spk = io.dict_read(args.ivec2spk) spk2ivecs = common.make_reverse_index(ivec2spk) io.log('GMM covariance type: {}'.format(args.cov_type)) spks = [] corr_lbls = [] pred_lbls = [] for i, spk in enumerate(spk2ivecs): io.log('--- Held-out spk: {} ({} / {}) ---'.format( spk, i + 1, len(spk2ivecs))) # Common base for training i-vectors train_group2ivecs = OrderedDict() for other_spk in spk2ivecs: if other_spk == spk: continue for ivec in spk2ivecs[other_spk]: group = ivec2group[ivec] if group not in train_group2ivecs: train_group2ivecs[group] = [] train_group2ivecs[group].append(ivec) # Get test i-vectors test_ivecs = spk2ivecs[spk] # Get results try: preds = run(ivectors, train_group2ivecs, test_ivecs, args.cov_type) spks.extend([spk] * len(test_ivecs)) corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs)) pred_lbls.extend(preds) except RuntimeError: traceback.print_exc() io.log('...skipping {}'.format(spk)) # Report results report(corr_lbls, pred_lbls, spks)
def main(): desc = 'Kaldi outputs token IDs in numbers. We can map them back to ' + \ 'textual form given an ID to text mapping. Will output to stdout.' parser = common.init_argparse(desc) parser.add_argument('fname', help='File to process. We expect each line ' + \ 'to have tokens separated by whitespace, where ' + \ 'the first token is a key or name (e.g. utt name) ' + \ 'that can be skipped, and the rest are ID numbers.') parser.add_argument('id_map', help='Mapping from textual form to ID. ' + \ 'We expect each line to have two tokens separated ' + \ 'by whitespace, where the first token is the text ' + \ 'and the second token is the ID number.') args = parser.parse_args() id_map = common.make_reverse_index(io.dict_read(args.id_map)) # Check that mapping from number to text is 1-to-1 for k in id_map.keys(): if len(id_map[k]) != 1: raise ValueError('Mapping at {} not 1-1: {}'.format(k, id_map[k])) id_map[k] = id_map[k][0] with open(args.fname, 'r') as f: for line in f: ary = line.strip().split() for i in range(1, len(ary)): ary[i] = id_map[ary[i]] print ' '.join(ary)
def main(): import argparse desc = 'Perform sanity check for i-vector grouping' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('ivector_ark', help='i-vectors ark file') parser.add_argument('ivec2group', help='i-vector to group mapping') parser.add_argument('train_keys', help='Keys for training set') parser.add_argument('--cov-type', default='diag', help='GMM covariance type (full|tied|diag|spherical)') parser.add_argument('--ignore', nargs='+', default=[], help='Ignore these groups') parser.add_argument('--test-keys', help='Keys for test set. If not ' + \ 'specified, treat keys not in training set as test.') args = parser.parse_args() io.log('Reading i-vector ark from {}'.format(args.ivector_ark)) ivectors = ivector_ark_read(args.ivector_ark) io.log('Reading i-vector grouping from {}'.format(args.ivec2group)) ivec2group = io.dict_read(args.ivec2group) io.log('Ignore list: {}'.format(args.ignore)) all_keys = [x for x in ivec2group if ivec2group[x] not in args.ignore] io.log('Reading training keys from {}'.format(args.train_keys)) train_keys = set(io.read_lines(args.train_keys)) test_keys = None if args.test_keys is not None: io.log('Reading test keys from {}'.format(args.test_keys)) test_keys = set(io.read_lines(args.test_keys)) train_ivec2group, test_ivec2group = OrderedDict(), OrderedDict() for k in all_keys: if k in ivectors: if k in train_keys: train_ivec2group[k] = ivec2group[k] elif test_keys is None or k in test_keys: test_ivec2group[k] = ivec2group[k] test_keys = test_ivec2group.keys() io.log('Train: {}, Test: {}'.format(len(train_keys), len(test_keys))) train_group2ivecs = common.make_reverse_index(train_ivec2group) io.log('GMM covariance type: {}'.format(args.cov_type)) # Fit GMM and do prediction corr_lbls = map(lambda x: test_ivec2group[x], test_keys) pred_lbls = run(ivectors, train_group2ivecs, test_keys, args.cov_type) # Report results acc = 100 * accuracy_score(corr_lbls, pred_lbls) print 'Overall accuracy: {:.2f} (%)'.format(acc) group_accs, group_names = comp_UARs(corr_lbls, pred_lbls) print 'Mean per-group accuracy: {:.2f} (%)'.format(100 * np.mean(group_accs)) print 'Individual group accuracies:' for group_acc, group_name in zip(group_accs, group_names): print '\t{} - {} (%)'.format(group_name, 100 * group_acc)
def main(args): ds = TemporalData.from_kaldi(args.scp) io.log('Loaded dataset containing {} utts'.format(len(ds.get_utt_names()))) utt2label = io.dict_read(args.utt2label) io.log('Loaded utt2label containing {} entries'.format(len(utt2label))) for utt_name in ds.get_utt_names(): if utt_name not in utt2label: io.log('WARNING: {} not in utt2label, skipping'.format(utt_name)) lbl = utt2label[utt_name] dur = ds.get_num_frames_by_utt_name(utt_name) print '{} {}'.format(utt_name, ' '.join([lbl] * dur))
def _init_records(self): """ Setup result caching for parameter combinations. Load existing results from disk if possible. Each line will look like this: <model_name> <train_err> <valid_err> [<valid_err> ...] """ self.records_fname = os.path.join(self.args.output_dir, 'summary.txt') self.records = OrderedDict() if os.path.exists(self.records_fname): io.log('Loading existing records from {}'.format( self.records_fname)) self.records = io.dict_read(self.records_fname, ordered=True, lst=True, fn=float)
def main(): desc = 'Convert from speaker i-vectors to utt-ivectors. Output to stdout.' parser = common.init_argparse(desc) parser.add_argument('spk_ivectors', help='File containing spk i-vectors.') parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.') args = parser.parse_args() spk_ivectors = ivector_ark_read(args.spk_ivectors) utt2spk = io.dict_read(args.utt2spk, ordered=True) spk2utt = common.make_reverse_index(utt2spk, ordered=True) wrote = 0 for spk in spk2utt.keys(): for utt in spk2utt[spk]: print_vector(utt, spk_ivectors[spk]) wrote += 1 io.log('Wrote {} utt i-vectors for {} spks'.format(wrote, len(spk2utt)))
def main(): desc = 'Reads in a pdf alignment and output prior counts to disk.' parser = common.init_argparse(desc) parser.add_argument('alipdf', help='pdf alignment file.') parser.add_argument('output_fname', help='File to output prior counts to') parser.add_argument('--num-pdfs', type=int, help='Number of pdfs. ' + \ 'If not set, use max value in `alipdf`.') args = parser.parse_args() alipdf = io.dict_read(args.alipdf) pdfs = [] for utt in alipdf.keys(): pdfs.extend(numpy.asarray(alipdf[utt], dtype=numpy.int)) bins = numpy.bincount(pdfs, minlength=args.num_pdfs) fw = open(args.output_fname, 'w') fw.write('[ {} ]\n'.format(' '.join(numpy.asarray(bins, dtype=numpy.str)))) fw.close()
def main(): desc = 'Convert from one mapping to another. Will output to stdout.' parser = common.init_argparse(desc) parser.add_argument('fname', help='File to process. We expect each line ' + \ 'to have tokens separated by whitespace, where ' + \ 'the first token is a key or name (e.g. utt name) ' + \ 'that can be skipped, and the rest are values.') parser.add_argument('id_map', help='Mapping from one ID to another ID. ' + \ 'Each line has two tokens separated by whitespace.') args = parser.parse_args() id_map = io.dict_read(args.id_map) io.log('Read {} mappings'.format(len(id_map))) with open(args.fname, 'r') as f: for line in f: ary = line.strip().split() for i in range(1, len(ary)): ary[i] = id_map[ary[i]] print ' '.join(ary)
def main(): desc = 'Convert from utt i-vectors to spk-ivectors. NOTE: this ' + \ 'script does not check the values of utt i-vectors that belong ' + \ 'to the same spk. It will simply treat the first utt i-vector ' + \ 'it finds from a spk as the i-vector for that spk. Output to stdout.' parser = common.init_argparse(desc) parser.add_argument('utt_ivectors', help='File containing utt i-vectors.') parser.add_argument('utt2spk', help='Kaldi utt2spk mapping.') args = parser.parse_args() utt_ivectors = ivector_ark_read(args.utt_ivectors, ordered=True) utt2spk = io.dict_read(args.utt2spk) processed_spks = set() for utt in utt_ivectors.keys(): spk = utt2spk[utt] if spk in processed_spks: continue print_vector(spk, utt_ivectors[utt]) processed_spks.add(spk) io.log('Wrote {} spk i-vectors'.format(len(processed_spks)))
def main(): import argparse desc = 'Perform sanity check for i-vector grouping' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('ivector_ark', help='i-vectors ark file') parser.add_argument('ivec2group', help='i-vector to group mapping') parser.add_argument('--cov-type', default='diag', help='GMM covariance type (full|tied|diag|spherical)') parser.add_argument('--withheld-frac', type=float, help='Fraction of i-vectors retained from each group ' \ + 'for testing. If not set, use the same ' \ + 'data for training and testing.') args = parser.parse_args() io.log('Reading i-vector ark from {}'.format(args.ivector_ark)) ivectors = ivector_ark_read(args.ivector_ark) io.log('Reading i-vector grouping from {}'.format(args.ivec2group)) ivec2group = io.dict_read(args.ivec2group) group2ivecs = common.make_reverse_index(ivec2group) io.log('GMM covariance type: {}'.format(args.cov_type)) io.log('Withheld fraction: {}'.format(args.withheld_frac)) # Fit GMM and do prediction if args.withheld_frac is None: corr_lbls = map(lambda x: ivec2group[x], ivectors.keys()) pred_lbls = run(ivectors, group2ivecs, ivectors.keys(), args.cov_type) else: corr_lbls = [] pred_lbls = [] for group in group2ivecs: # Common base for training i-vectors train_group2ivecs = OrderedDict() for other_group in group2ivecs: if other_group != group: train_group2ivecs[other_group] = group2ivecs[other_group] # Get partitions of test i-vectors and step through each one test_partitions = partition(group2ivecs[group], args.withheld_frac) for i in range(len(test_partitions)): io.log('-- Partition {} / {} for {}'.format( i + 1, len(test_partitions), group )) test_ivecs = test_partitions[i] # Get training i-vectors for this group train_ivecs = [] for j in range(len(test_partitions)): if j != i: train_ivecs.extend(test_partitions[j]) train_group2ivecs[group] = train_ivecs # Get results corr_lbls.extend(map(lambda x: ivec2group[x], test_ivecs)) pred_lbls.extend(run(ivectors, train_group2ivecs, test_ivecs, args.cov_type)) # Report results acc = 100 * accuracy_score(corr_lbls, pred_lbls) print 'Overall accuracy: {} (%)'.format(acc) group_accs, group_names = comp_UARs(corr_lbls, pred_lbls) print 'Mean per-group accuracy: {} (%)'.format(100 * np.mean(group_accs)) print 'Individual group accuracies:' for group_acc, group_name in zip(group_accs, group_names): print '\t{} - {} (%)'.format(group_name, 100 * group_acc)