def score(fn_reftext, fn_testtext, outfile=sys.stdout): reftext = load_wavaskey(fn_reftext, Utterance) testtext = load_wavaskey(fn_testtext, Utterance) corr, sub, dels, ins, wer, nwords = score_file(reftext, testtext) m = """ Please note that the scoring is implicitly ignoring all non-speech events. Ref: {r} Tst: {t} |==============================================================================================| | | # Sentences | # Words | Corr | Sub | Del | Ins | Err | |----------------------------------------------------------------------------------------------| | Sum/Avg |{num_sents:^14}|{num_words:^11.0f}|{corr:^10.2f}|{sub:^10.2f}|{dels:^10.2f}|{ins:^10.2f}|{wer:^10.2f}| |==============================================================================================| """.format( r=fn_reftext, t=fn_testtext, num_sents=len(reftext), num_words=nwords, corr=corr, sub=sub, dels=dels, ins=ins, wer=wer, ) outfile.write(m) outfile.write("\n")
def score(fn_reftext, fn_testtext, outfile=sys.stdout): reftext = load_wavaskey(fn_reftext, Utterance) testtext = load_wavaskey(fn_testtext, Utterance) corr, sub, dels, ins, wer, nwords = score_file(reftext, testtext) m = """ Please note that the scoring is implicitly ignoring all non-speech events. Ref: {r} Tst: {t} |==============================================================================================| | | # Sentences | # Words | Corr | Sub | Del | Ins | Err | |----------------------------------------------------------------------------------------------| | Sum/Avg |{num_sents:^14}|{num_words:^11.0f}|{corr:^10.2f}|{sub:^10.2f}|{dels:^10.2f}|{ins:^10.2f}|{wer:^10.2f}| |==============================================================================================| """.format(r=fn_reftext, t=fn_testtext, num_sents=len(reftext), num_words=nwords, corr=corr, sub=sub, dels=dels, ins=ins, wer=wer) outfile.write(m) outfile.write("\n")
def hdc_slu(fn_input, constructor, fn_output): """ Use for transcription a HDC SLU model. :param fn_model: :param fn_input: :param constructor: :param fn_reference: :return: """ print "="*120 print "HDC SLU: ", fn_input, fn_output print "-"*120 from alex.components.slu.base import CategoryLabelDatabase from alex.applications.PublicTransportInfoCS.preprocessing import PTICSSLUPreprocessing from alex.applications.PublicTransportInfoCS.hdc_slu import PTICSHDCSLU from alex.corpustools.wavaskey import load_wavaskey, save_wavaskey from alex.corpustools.semscore import score cldb = CategoryLabelDatabase('../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) hdc_slu = PTICSHDCSLU(preprocessing, cfg = {'SLU': {PTICSHDCSLU: {'utt2da': as_project_path("applications/PublicTransportInfoCS/data/utt2da_dict.txt")}}}) test_utterances = load_wavaskey(fn_input, constructor, limit=1000000) parsed_das = {} for utt_key, utt in sorted(test_utterances.iteritems()): if isinstance(utt, Utterance): obs = {'utt': utt} elif isinstance(utt, UtteranceNBList): obs = {'utt_nbl': utt} else: raise BaseException('Unsupported observation type') print '-' * 120 print "Observation:" print utt_key, " ==> " print unicode(utt) da_confnet = hdc_slu.parse(obs, verbose=False) print "Conf net:" print unicode(da_confnet) da_confnet.prune() dah = da_confnet.get_best_da_hyp() print "1 best: " print unicode(dah) parsed_das[utt_key] = dah.da if 'CL_' in str(dah.da): print '*' * 120 print utt print dah.da hdc_slu.parse(obs, verbose=True) save_wavaskey(fn_output, parsed_das, trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
def main(): files = [] for i in range(1, len(sys.argv)): k = load_wavaskey(sys.argv[i], unicode) print sys.argv[i], len(k) files.append(k) keys = set() keys.update(set(files[0].keys())) ukeys = set() for f in files: keys = keys.intersection(set(f.keys())) ukeys = ukeys.union(set(f.keys())) print len(keys), len(ukeys), len(ukeys - keys) for f in files: rk = set(f.keys()) - keys for k in rk: if k in f: del f[k] for i in range(1, len(sys.argv)): save_wavaskey(sys.argv[i] + '.pruned', files[i - 1])
def main(): import autopath files = [] for i in range(1, len(sys.argv)): k = load_wavaskey(sys.argv[i], unicode) print sys.argv[i], len(k) files.append(k) keys = set() keys.update(set(files[0].keys())) ukeys = set() for f in files: keys = keys.intersection(set(f.keys())) ukeys = ukeys.union(set(f.keys())) print len(keys), len(ukeys), len(ukeys - keys) for f in files: rk = set(f.keys()) - keys for k in rk: if k in f: del f[k] for i in range(1, len(sys.argv)): save_wavaskey(sys.argv[i]+'.pruned',files[i-1])
def decode_with_reference(reference, outdir, cfg): """ Launch the decoding Args: reference(str): Path to file with references in Alex reference format. outdir(str): Path to directory where to save log files. cfg(dict): Alex configuration file """ asr = asr_factory(cfg) trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} for wav_path, reference in trn_dict.iteritems(): best, dec_dur, fw_dur, wav_dur = decode_info(asr, cfg, wav_path, reference) dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
def train(fn_model, fn_transcription, constructor, fn_annotation, fn_bs_transcription, fn_bs_annotation, min_pos_feature_count, min_neg_feature_count, min_classifier_count, limit=100000): """ Trains a SLU DAILogRegClassifier model. :param fn_model: :param fn_transcription: :param constructor: :param fn_annotation: :param limit: :return: """ bs_utterances = load_wavaskey(fn_bs_transcription, Utterance, limit=limit) increase_weight(bs_utterances, min_pos_feature_count + 10) bs_das = load_wavaskey(fn_bs_annotation, DialogueAct, limit=limit) increase_weight(bs_das, min_pos_feature_count + 10) utterances = load_wavaskey(fn_transcription, constructor, limit=limit) das = load_wavaskey(fn_annotation, DialogueAct, limit=limit) utterances.update(bs_utterances) das.update(bs_das) cldb = CategoryLabelDatabase('../../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) slu = DAILogRegClassifier(cldb, preprocessing, features_size=4) slu.extract_classifiers(das, utterances, verbose=True) slu.prune_classifiers(min_classifier_count=min_classifier_count) slu.print_classifiers() slu.gen_classifiers_data(min_pos_feature_count=min_pos_feature_count, min_neg_feature_count=min_neg_feature_count, verbose2=True) slu.train(inverse_regularisation=1e1, verbose=True) slu.save_model(fn_model)
def train(fn_model, fn_transcription, constructor, fn_annotation, fn_bs_transcription, fn_bs_annotation, min_pos_feature_count, min_neg_feature_count, min_classifier_count, limit = 100000): """ Trains a SLU DAILogRegClassifier model. :param fn_model: :param fn_transcription: :param constructor: :param fn_annotation: :param limit: :return: """ bs_utterances = load_wavaskey(fn_bs_transcription, Utterance, limit = limit) increase_weight(bs_utterances, min_pos_feature_count+10) bs_das = load_wavaskey(fn_bs_annotation, DialogueAct, limit = limit) increase_weight(bs_das, min_pos_feature_count+10) utterances = load_wavaskey(fn_transcription, constructor, limit = limit) das = load_wavaskey(fn_annotation, DialogueAct, limit = limit) utterances.update(bs_utterances) das.update(bs_das) cldb = CategoryLabelDatabase('../../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) slu = DAILogRegClassifier(cldb, preprocessing, features_size=4) slu.extract_classifiers(das, utterances, verbose=True) slu.prune_classifiers(min_classifier_count = min_classifier_count) slu.print_classifiers() slu.gen_classifiers_data(min_pos_feature_count = min_pos_feature_count, min_neg_feature_count = min_neg_feature_count, verbose2 = True) slu.train(inverse_regularisation=1e1, verbose=True) slu.save_model(fn_model)
def decode_with_reference(reference, outdir, cfg): asr = asr_factory(cfg) trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} for wav_path, reference in trn_dict.iteritems(): best, dec_dur, fw_dur, wav_dur = decode_info(asr, cfg, wav_path, reference) dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
def main(): utterances = load_wavaskey("all.trn", unicode, limit=100000) keys = list(utterances.keys()) random.seed() random.shuffle(keys) for k in keys: if '_' in utterances[k]: continue url = 'www.google.cz/#q='+urllib.quote_plus(utterances[k].lower().encode('utf8')) browser = subprocess.Popen(['opera', '-nosession', '-nomail', '-noraise', '-geometry', '500x100+0+0', url]) time.sleep(random.randint(10, 200)) os.system('kill -9 {pid}'.format(pid=browser.pid))
def decode_with_reference(reference, outdir, num_workers): """ Launch the decoding Args: reference(str): Path to file with references in Alex reference format. outdir(str): Path to directory where to save log files. cfg(dict): Alex configuration file """ trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} params = [(outdir, wav_path, reference) for wav_path, reference in trn_dict.items()] random.shuffle(params) if num_workers > 1: p_decode_wavs = multiprocessing.Pool(num_workers) decoded_wavs = p_decode_wavs.map(decode_info, params, 100) else: decoded_wavs = [] for p in params: decoded_wavs.append(decode_info(p)) for best, dec_dur, fw_dur, wav_dur, wav_path in decoded_wavs: dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur # compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) # for wav_path, reference in sorted(trn_dict.items()): # best, dec_dur, fw_dur, wav_dur, wav_path = decode_info(asr, cfg, outdir, wav_path, reference) # dec_dict[wav_path] = best # wavlen_dict[wav_path] = wav_dur # declen_dict[wav_path] = dec_dur # fwlen_dict[wav_path] = fw_dur # # compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
def decode_with_reference(reference, outdir, num_workers): """ Launch the decoding Args: reference(str): Path to file with references in Alex reference format. outdir(str): Path to directory where to save log files. cfg(dict): Alex configuration file """ trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} params = [ (outdir, wav_path, reference) for wav_path, reference in trn_dict.items()] random.shuffle(params) if num_workers > 1: p_decode_wavs = multiprocessing.Pool(num_workers) decoded_wavs = p_decode_wavs.map(decode_info, params, 100) else: decoded_wavs = [] for p in params: decoded_wavs.append(decode_info(p)) for best, dec_dur, fw_dur, wav_dur, wav_path in decoded_wavs: dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur # compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) # for wav_path, reference in sorted(trn_dict.items()): # best, dec_dur, fw_dur, wav_dur, wav_path = decode_info(asr, cfg, outdir, wav_path, reference) # dec_dict[wav_path] = best # wavlen_dict[wav_path] = wav_dur # declen_dict[wav_path] = dec_dur # fwlen_dict[wav_path] = fw_dur # # compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
def main(): utterances = load_wavaskey("all.trn", unicode, limit=100000) keys = list(utterances.keys()) random.seed() random.shuffle(keys) for k in keys: if '_' in utterances[k]: continue url = 'www.google.cz/#q=' + urllib.quote_plus( utterances[k].lower().encode('utf8')) browser = subprocess.Popen([ 'opera', '-nosession', '-nomail', '-noraise', '-geometry', '500x100+0+0', url ]) time.sleep(random.randint(10, 200)) os.system('kill -9 {pid}'.format(pid=browser.pid))
def load_das(das_fname, limit=None, encoding='UTF-8'): """ Loads a dictionary of DAs from a given file. The file is assumed to contain lines of the following form: [[:space:]..]<key>[[:space:]..]=>[[:space:]..]<DA>[[:space:]..] or just (without keys): [[:space:]..]<DA>[[:space:]..] Arguments: das_fname -- path towards the file to read the DAs from limit -- limit on the number of DAs to read encoding -- the file encoding Returns a dictionary with DAs (instances of DialogueAct) as values. """ return load_wavaskey(das_fname, DialogueAct, limit, encoding)
def decode_with_reference(reference, outdir, cfg): """ Launch the decoding Args: reference(str): Path to file with references in Alex reference format. outdir(str): Path to directory where to save log files. cfg(dict): Alex configuration file """ asr = asr_factory(cfg) trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} for wav_path, reference in sorted(trn_dict.items()): best, dec_dur, fw_dur, wav_dur = decode_info(asr, cfg, outdir, wav_path, reference) dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
cal_list = [ x[1] for x in pri_cal_list] s = time.time() for wpm in wp_2_match: i, f = find_approx(wpm[0]) e = time.time() print "size {size} elapsed {time}".format(size=len(wp_2_match), time = e - s) print "="*120 print "The calibration table: insert it in the config" print "-"*120 print repr(cal_list) if __name__ == '__main__': reference = 'decoded_kaldi/all_trn.txt' trn_dict = load_wavaskey(reference, Utterance) trn_dict = basename_dict(trn_dict) fst_dir = 'decoded_kaldi' fst_fns = sorted(glob.glob(os.path.join(fst_dir, '*.fst'))) words = load_words('models/words.txt') wp_2_match = [] for i, fn in enumerate(fst_fns): print '='*120 print i, fn ref = trn_dict[os.path.basename(fn).replace('fst','wav')] print unicode(ref) print '-'*120
def hdc_slu_test(fn_input, constructor, fn_reference): """ Tests the HDC SLU. :param fn_model: :param fn_input: :param constructor: :param fn_reference: :return: """ print "=" * 120 print "Testing HDC SLU: ", fn_input, fn_reference print "-" * 120 from alex.components.slu.base import CategoryLabelDatabase from alex.applications.PublicTransportInfoCS.preprocessing import PTICSSLUPreprocessing from alex.applications.PublicTransportInfoCS.hdc_slu import PTICSHDCSLU from alex.corpustools.wavaskey import load_wavaskey, save_wavaskey from alex.corpustools.semscore import score cldb = CategoryLabelDatabase('../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) hdc_slu = PTICSHDCSLU( preprocessing, cfg={ 'SLU': { PTICSHDCSLU: { 'utt2da': as_project_path( "applications/PublicTransportInfoCS/data/utt2da_dict.txt" ) } } }) test_utterances = load_wavaskey(fn_input, constructor, limit=100000) parsed_das = {} for utt_key, utt in sorted(test_utterances.iteritems()): if isinstance(utt, Utterance): obs = {'utt': utt} elif isinstance(utt, UtteranceNBList): obs = {'utt_nbl': utt} else: raise BaseException('Unsupported observation type') print '-' * 120 print "Observation:" print utt_key, " ==> " print unicode(utt) da_confnet = hdc_slu.parse(obs, verbose=False) print "Conf net:" print unicode(da_confnet) da_confnet.prune() dah = da_confnet.get_best_da_hyp() print "1 best: " print unicode(dah) parsed_das[utt_key] = dah.da if 'CL_' in str(dah.da): print '*' * 120 print utt print dah.da hdc_slu.parse(obs, verbose=True) fn_sem = os.path.basename(fn_input) + '.hdc.sem.out' save_wavaskey(fn_sem, parsed_das, trans=lambda da: '&'.join(sorted(unicode(da).split('&')))) f = codecs.open(os.path.basename(fn_sem) + '.score', 'w+', encoding='UTF-8') score(fn_reference, fn_sem, True, True, f) f.close()
def load_das(das_fname, limit=None, encoding='UTF-8'): return load_wavaskey(das_fname, CUEDDialogueAct, limit, encoding)
def trained_slu_test(fn_model, fn_input, constructor, fn_reference): """ Tests a SLU DAILogRegClassifier model. :param fn_model: :param fn_input: :param constructor: :param fn_reference: :return: """ print "="*120 print "Testing: ", fn_model, fn_input, fn_reference print "-"*120 from alex.applications.PublicTransportInfoCS.preprocessing import PTICSSLUPreprocessing from alex.components.slu.base import CategoryLabelDatabase from alex.components.slu.dailrclassifier import DAILogRegClassifier from alex.corpustools.wavaskey import load_wavaskey, save_wavaskey from alex.corpustools.semscore import score cldb = CategoryLabelDatabase('../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) slu = DAILogRegClassifier(cldb, preprocessing) slu.load_model(fn_model) test_utterances = load_wavaskey(fn_input, constructor, limit=100000) parsed_das = {} for utt_key, utt in sorted(test_utterances.iteritems()): if isinstance(utt, Utterance): obs = {'utt': utt} elif isinstance(utt, UtteranceNBList): obs = {'utt_nbl': utt} else: raise BaseException('Unsupported observation type') print '-' * 120 print "Observation:" print utt_key, " ==> " print unicode(utt) da_confnet = slu.parse(obs, verbose=False) print "Conf net:" print unicode(da_confnet) da_confnet.prune() dah = da_confnet.get_best_da_hyp() print "1 best: " print unicode(dah) parsed_das[utt_key] = dah.da if 'CL_' in str(dah.da): print '*' * 120 print utt print dah.da slu.parse(obs, verbose=True) if 'trn' in fn_model: fn_sem = os.path.basename(fn_input)+'.model.trn.sem.out' elif 'asr' in fn_model: fn_sem = os.path.basename(fn_input)+'.model.asr.sem.out' elif 'nbl' in fn_model: fn_sem = os.path.basename(fn_input)+'.model.nbl.sem.out' else: fn_sem = os.path.basename(fn_input)+'.XXX.sem.out' save_wavaskey(fn_sem, parsed_das, trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) f = codecs.open(os.path.basename(fn_sem)+'.score', 'w+', encoding='UTF-8') score(fn_reference, fn_sem, True, True, f) f.close()
cal_list = [x[1] for x in pri_cal_list] s = time.time() for wpm in wp_2_match: i, f = find_approx(wpm[0]) e = time.time() print "size {size} elapsed {time}".format(size=len(wp_2_match), time=e - s) print "=" * 120 print "The calibration table: insert it in the config" print "-" * 120 print repr(cal_list) if __name__ == '__main__': reference = 'decoded_kaldi/all_trn.txt' trn_dict = load_wavaskey(reference, Utterance) trn_dict = basename_dict(trn_dict) fst_dir = 'decoded_kaldi' fst_fns = sorted(glob.glob(os.path.join(fst_dir, '*.fst'))) words = load_words('models/words.txt') wp_2_match = [] for i, fn in enumerate(fst_fns): print '=' * 120 print i, fn ref = trn_dict[os.path.basename(fn).replace('fst', 'wav')] print unicode(ref) print '-' * 120
def hdc_slu_test(fn_input, constructor, fn_reference): """ Tests a SLU DAILogRegClassifier model. :param fn_model: :param fn_input: :param constructor: :param fn_reference: :return: """ print "="*120 print "Testing HDC SLU: ", fn_input, fn_reference print "-"*120 from alex.components.slu.base import CategoryLabelDatabase from alex.applications.PublicTransportInfoCS.preprocessing import PTICSSLUPreprocessing from alex.applications.PublicTransportInfoCS.hdc_slu import PTICSHDCSLU from alex.corpustools.wavaskey import load_wavaskey, save_wavaskey from alex.corpustools.semscore import score cldb = CategoryLabelDatabase('../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) hdc_slu = PTICSHDCSLU(preprocessing) test_utterances = load_wavaskey(fn_input, constructor, limit=100000) parsed_das = {} for utt_key, utt in sorted(test_utterances.iteritems()): if isinstance(utt, Utterance): obs = {'utt': utt} elif isinstance(utt, UtteranceNBList): obs = {'utt_nbl': utt} else: raise BaseException('Unsupported observation type') print '-' * 120 print "Observation:" print utt_key, " ==> " print unicode(utt) da_confnet = hdc_slu.parse(obs, verbose=False) print "Conf net:" print unicode(da_confnet) da_confnet.prune() dah = da_confnet.get_best_da_hyp() print "1 best: " print unicode(dah) parsed_das[utt_key] = dah.da if 'CL_' in str(dah.da): print '*' * 120 print utt print dah.da hdc_slu.parse(obs, verbose=True) fn_sem = os.path.basename(fn_input)+'.hdc.sem.out' save_wavaskey(fn_sem, parsed_das, trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) f = codecs.open(os.path.basename(fn_sem)+'.score', 'w+', encoding='UTF-8') score(fn_reference, fn_sem, True, True, f) f.close()