def __init__(self, cfg, commands, audio_in, asr_hypotheses_out, close_event): """ Initialises an ASR object according to the configuration (cfg['ASR'] is the relevant section), and stores pipe ends to other processes. Arguments: cfg: a Config object specifying the configuration to use commands: our end of a pipe (multiprocessing.Pipe) for receiving commands audio_in: our end of a pipe (multiprocessing.Pipe) for receiving audio frames (from VAD) asr_hypotheses_out: our end of a pipe (multiprocessing.Pipe) for sending ASR hypotheses """ multiprocessing.Process.__init__(self) self.cfg = cfg self.commands = commands self.local_commands = deque() self.audio_in = audio_in self.local_audio_in = deque() self.asr_hypotheses_out = asr_hypotheses_out self.close_event = close_event # Load the ASR self.asr = asr_factory(cfg) self.system_logger = self.cfg['Logging']['system_logger'] self.session_logger = self.cfg['Logging']['session_logger'] self.recognition_on = False
def __init__(self, cfg, commands, audio_in, asr_hypotheses_out, close_event): """ Initialises an ASR object according to the configuration (cfg['ASR'] is the relevant section), and stores pipe ends to other processes. Arguments: cfg: a Config object specifying the configuration to use commands: our end of a pipe (multiprocessing.Pipe) for receiving commands audio_in: our end of a pipe (multiprocessing.Pipe) for receiving audio frames (from VAD) asr_hypotheses_out: our end of a pipe (multiprocessing.Pipe) for sending ASR hypotheses """ multiprocessing.Process.__init__(self) self.cfg = cfg self.commands = commands self.local_commands = deque() self.audio_in = audio_in self.local_audio_in = deque() self.asr_hypotheses_out = asr_hypotheses_out self.close_event = close_event # Load the ASR self.asr = asr_factory(cfg) self.system_logger = self.cfg['Logging']['system_logger'] self.session_logger = self.cfg['Logging']['session_logger'] self.recognition_on = False
def decode_with_reference(reference, outdir, cfg): """ Launch the decoding Args: reference(str): Path to file with references in Alex reference format. outdir(str): Path to directory where to save log files. cfg(dict): Alex configuration file """ asr = asr_factory(cfg) trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} for wav_path, reference in trn_dict.iteritems(): best, dec_dur, fw_dur, wav_dur = decode_info(asr, cfg, wav_path, reference) dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
def decode_with_reference(reference, outdir, cfg): asr = asr_factory(cfg) trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} for wav_path, reference in trn_dict.iteritems(): best, dec_dur, fw_dur, wav_dur = decode_info(asr, cfg, wav_path, reference) dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
def decode_with_reference(reference, outdir, cfg): """ Launch the decoding Args: reference(str): Path to file with references in Alex reference format. outdir(str): Path to directory where to save log files. cfg(dict): Alex configuration file """ asr = asr_factory(cfg) trn_dict = load_wavaskey(reference, Utterance) declen_dict, fwlen_dict, wavlen_dict, dec_dict = {}, {}, {}, {} for wav_path, reference in sorted(trn_dict.items()): best, dec_dur, fw_dur, wav_dur = decode_info(asr, cfg, outdir, wav_path, reference) dec_dict[wav_path] = best wavlen_dict[wav_path] = wav_dur declen_dict[wav_path] = dec_dur fwlen_dict[wav_path] = fw_dur compute_rt_factor(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict, fwlen_dict)
preprocessing = PTICSSLUPreprocessing(cldb) slu = PTICSHDCSLU( preprocessing, cfg={ 'SLU': { PTICSHDCSLU: { 'utt2da': as_project_path( "applications/PublicTransportInfoCS/data/utt2da_dict.txt") } } }) cfg = Config.load_configs([ '../kaldi.cfg', ], use_default=True) asr_rec = asr_factory(cfg) def normalise_semi_words(txt): # normalise these semi-words if txt == '__other__': txt = '_other_' elif txt == '__silence__': txt = '_other_' elif not txt: txt = '_other_' return txt def process_call_log(fn):
def extract_from_xml(indomain_data_dir, outdir, cfg): glob = 'asr_transcribed.xml' asr = asr_factory(cfg) print 'Collecting files under %s with glob %s' % (indomain_data_dir, glob) files = [] for root, dirnames, filenames in os.walk(indomain_data_dir, followlinks=True): for filename in fnmatch.filter(filenames, glob): files.append(os.path.join(root, filename)) # DEBUG example # files = [ # '/ha/projects/vystadial/data/call-logs/2013-05-30-alex-aotb-prototype/part1/2013-06-27-09-33-25.116055-CEST-00420221914256/asr_transcribed.xml'] try: trn, dec, dec_len, wav_len = [], [], [], [] for fn in files: doc = xml.dom.minidom.parse(fn) turns = doc.getElementsByTagName("turn") f_dir = os.path.dirname(fn) for turn in turns: if turn.getAttribute('speaker') != 'user': continue recs = turn.getElementsByTagName("rec") trans = turn.getElementsByTagName("asr_transcription") if len(recs) != 1: print "Skipping a turn {turn} in file: {fn} - recs: {recs}".format(turn=turn.getAttribute('turn_number'), fn=fn, recs=len(recs)) continue if len(trans) == 0: print "Skipping a turn in {fn} - trans: {trans}".format(fn=fn, trans=len(trans)) continue wav_file = recs[0].getAttribute('fname') # FIXME: Check whether the last transcription is really the best! FJ t = various.get_text_from_xml_node(trans[-1]) t = normalise_text(t) if exclude_lm(t): continue # TODO is it still valid? OP # The silence does not have a label in the language model. t = t.replace('_SIL_', '') trn.append((wav_file, t)) wav_path = os.path.join(f_dir, wav_file) best, dec_dur, fw_dur, wav_dur = decode_info(asr, cfg, wav_path, t) dec.append((wav_file, best)) wav_len.append((wav_file, wav_dur)) dec_len.append((wav_file, dec_dur)) except Exception as e: print 'PARTIAL RESULTS were saved to %s' % outdir print e raise e finally: trn_dict = dict(trn) dec_dict = dict(dec) wavlen_dict = dict(wav_len) declen_dict = dict(dec_len) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict)
def main(): cldb = CategoryLabelDatabase('../data/database.py') preprocessing = PTIENSLUPreprocessing(cldb) slu = PTIENHDCSLU(preprocessing, cfg={'SLU': {PTIENHDCSLU: {'utt2da': as_project_path("applications/PublicTransportInfoEN/data/utt2da_dict.txt")}}}) cfg = Config.load_configs(['../kaldi.cfg',], use_default=True) asr_rec = asr_factory(cfg) fn_uniq_trn = 'uniq.trn' fn_uniq_trn_hdc_sem = 'uniq.trn.hdc.sem' fn_uniq_trn_sem = 'uniq.trn.sem' fn_all_sem = 'all.sem' fn_all_trn = 'all.trn' fn_all_trn_hdc_sem = 'all.trn.hdc.sem' fn_all_asr = 'all.asr' fn_all_asr_hdc_sem = 'all.asr.hdc.sem' fn_all_nbl = 'all.nbl' fn_all_nbl_hdc_sem = 'all.nbl.hdc.sem' fn_train_sem = 'train.sem' fn_train_trn = 'train.trn' fn_train_trn_hdc_sem = 'train.trn.hdc.sem' fn_train_asr = 'train.asr' fn_train_asr_hdc_sem = 'train.asr.hdc.sem' fn_train_nbl = 'train.nbl' fn_train_nbl_hdc_sem = 'train.nbl.hdc.sem' fn_dev_sem = 'dev.sem' fn_dev_trn = 'dev.trn' fn_dev_trn_hdc_sem = 'dev.trn.hdc.sem' fn_dev_asr = 'dev.asr' fn_dev_asr_hdc_sem = 'dev.asr.hdc.sem' fn_dev_nbl = 'dev.nbl' fn_dev_nbl_hdc_sem = 'dev.nbl.hdc.sem' fn_test_sem = 'test.sem' fn_test_trn = 'test.trn' fn_test_trn_hdc_sem = 'test.trn.hdc.sem' fn_test_asr = 'test.asr' fn_test_asr_hdc_sem = 'test.asr.hdc.sem' fn_test_nbl = 'test.nbl' fn_test_nbl_hdc_sem = 'test.nbl.hdc.sem' indomain_data_dir = "indomain_data" print "Generating the SLU train and test data" print "-"*120 ############################################################################################### files = [] files.append(glob.glob(os.path.join(indomain_data_dir, 'asr_transcribed.xml'))) files.append(glob.glob(os.path.join(indomain_data_dir, '*', 'asr_transcribed.xml'))) files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', 'asr_transcribed.xml'))) files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', '*', 'asr_transcribed.xml'))) files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', '*', '*', 'asr_transcribed.xml'))) files.append(glob.glob(os.path.join(indomain_data_dir, '*', '*', '*', '*', '*', 'asr_transcribed.xml'))) files = various.flatten(files) sem = [] trn = [] trn_hdc_sem = [] asr = [] asr_hdc_sem = [] nbl = [] nbl_hdc_sem = [] for fn in files[:100000]: f_dir = os.path.dirname(fn) print "Processing:", fn doc = xml.dom.minidom.parse(fn) turns = doc.getElementsByTagName("turn") for i, turn in enumerate(turns): if turn.getAttribute('speaker') != 'user': continue recs = turn.getElementsByTagName("rec") trans = turn.getElementsByTagName("asr_transcription") asrs = turn.getElementsByTagName("asr") if len(recs) != 1: print "Skipping a turn {turn} in file: {fn} - recs: {recs}".format(turn=i,fn=fn, recs=len(recs)) continue if len(asrs) == 0 and (i + 1) < len(turns): next_asrs = turns[i+1].getElementsByTagName("asr") if len(next_asrs) != 2: print "Skipping a turn {turn} in file: {fn} - asrs: {asrs} - next_asrs: {next_asrs}".format(turn=i, fn=fn, asrs=len(asrs), next_asrs=len(next_asrs)) continue print "Recovered from missing ASR output by using a delayed ASR output from the following turn of turn {turn}. File: {fn} - next_asrs: {asrs}".format(turn=i, fn=fn, asrs=len(next_asrs)) hyps = next_asrs[0].getElementsByTagName("hypothesis") elif len(asrs) == 1: hyps = asrs[0].getElementsByTagName("hypothesis") elif len(asrs) == 2: print "Recovered from EXTRA ASR outputs by using a the last ASR output from the turn. File: {fn} - asrs: {asrs}".format(fn=fn, asrs=len(asrs)) hyps = asrs[-1].getElementsByTagName("hypothesis") else: print "Skipping a turn {turn} in file {fn} - asrs: {asrs}".format(turn=i,fn=fn, asrs=len(asrs)) continue if len(trans) == 0: print "Skipping a turn in {fn} - trans: {trans}".format(fn=fn, trans=len(trans)) continue wav_key = recs[0].getAttribute('fname') wav_path = os.path.join(f_dir, wav_key) # FIXME: Check whether the last transcription is really the best! FJ t = various.get_text_from_xml_node(trans[-1]) t = normalise_text(t) if '--asr-log' not in sys.argv: asr_rec_nbl = asr_rec.rec_wav_file(wav_path) a = unicode(asr_rec_nbl.get_best()) else: a = various.get_text_from_xml_node(hyps[0]) a = normalise_semi_words(a) if exclude_slu(t) or 'DOM Element:' in a: print "Skipping transcription:", unicode(t) print "Skipping ASR output: ", unicode(a) continue # The silence does not have a label in the language model. t = t.replace('_SIL_','') trn.append((wav_key, t)) print "Parsing transcription:", unicode(t) print " ASR:", unicode(a) # HDC SLU on transcription s = slu.parse_1_best({'utt':Utterance(t)}).get_best_da() trn_hdc_sem.append((wav_key, s)) if '--uniq' not in sys.argv: # HDC SLU on 1 best ASR if '--asr-log' not in sys.argv: a = unicode(asr_rec_nbl.get_best()) else: a = various.get_text_from_xml_node(hyps[0]) a = normalise_semi_words(a) asr.append((wav_key, a)) s = slu.parse_1_best({'utt':Utterance(a)}).get_best_da() asr_hdc_sem.append((wav_key, s)) # HDC SLU on N best ASR n = UtteranceNBList() if '--asr-log' not in sys.argv: n = asr_rec_nbl print 'ASR RECOGNITION NBLIST\n',unicode(n) else: for h in hyps: txt = various.get_text_from_xml_node(h) txt = normalise_semi_words(txt) n.add(abs(float(h.getAttribute('p'))),Utterance(txt)) n.merge() n.normalise() nbl.append((wav_key, n.serialise())) if '--fast' not in sys.argv: s = slu.parse_nblist({'utt_nbl':n}).get_best_da() nbl_hdc_sem.append((wav_key, s)) # there is no manual semantics in the transcriptions yet sem.append((wav_key, None)) uniq_trn = {} uniq_trn_hdc_sem = {} uniq_trn_sem = {} trn_set = set() sem = dict(trn_hdc_sem) for k, v in trn: if not v in trn_set: trn_set.add(v) uniq_trn[k] = v uniq_trn_hdc_sem[k] = sem[k] uniq_trn_sem[k] = v + " <=> " + unicode(sem[k]) save_wavaskey(fn_uniq_trn, uniq_trn) save_wavaskey(fn_uniq_trn_hdc_sem, uniq_trn_hdc_sem, trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_uniq_trn_sem, uniq_trn_sem) # all save_wavaskey(fn_all_trn, dict(trn)) save_wavaskey(fn_all_trn_hdc_sem, dict(trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) if '--uniq' not in sys.argv: save_wavaskey(fn_all_asr, dict(asr)) save_wavaskey(fn_all_asr_hdc_sem, dict(asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_all_nbl, dict(nbl)) save_wavaskey(fn_all_nbl_hdc_sem, dict(nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) seed_value = 10 random.seed(seed_value) random.shuffle(trn) random.seed(seed_value) random.shuffle(trn_hdc_sem) random.seed(seed_value) random.shuffle(asr) random.seed(seed_value) random.shuffle(asr_hdc_sem) random.seed(seed_value) random.shuffle(nbl) random.seed(seed_value) random.shuffle(nbl_hdc_sem) # trn train_trn = trn[:int(0.8*len(trn))] dev_trn = trn[int(0.8*len(trn)):int(0.9*len(trn))] test_trn = trn[int(0.9*len(trn)):] save_wavaskey(fn_train_trn, dict(train_trn)) save_wavaskey(fn_dev_trn, dict(dev_trn)) save_wavaskey(fn_test_trn, dict(test_trn)) # trn_hdc_sem train_trn_hdc_sem = trn_hdc_sem[:int(0.8*len(trn_hdc_sem))] dev_trn_hdc_sem = trn_hdc_sem[int(0.8*len(trn_hdc_sem)):int(0.9*len(trn_hdc_sem))] test_trn_hdc_sem = trn_hdc_sem[int(0.9*len(trn_hdc_sem)):] save_wavaskey(fn_train_trn_hdc_sem, dict(train_trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_dev_trn_hdc_sem, dict(dev_trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_test_trn_hdc_sem, dict(test_trn_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) # asr train_asr = asr[:int(0.8*len(asr))] dev_asr = asr[int(0.8*len(asr)):int(0.9*len(asr))] test_asr = asr[int(0.9*len(asr)):] save_wavaskey(fn_train_asr, dict(train_asr)) save_wavaskey(fn_dev_asr, dict(dev_asr)) save_wavaskey(fn_test_asr, dict(test_asr)) # asr_hdc_sem train_asr_hdc_sem = asr_hdc_sem[:int(0.8*len(asr_hdc_sem))] dev_asr_hdc_sem = asr_hdc_sem[int(0.8*len(asr_hdc_sem)):int(0.9*len(asr_hdc_sem))] test_asr_hdc_sem = asr_hdc_sem[int(0.9*len(asr_hdc_sem)):] save_wavaskey(fn_train_asr_hdc_sem, dict(train_asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_dev_asr_hdc_sem, dict(dev_asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_test_asr_hdc_sem, dict(test_asr_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) # n-best lists train_nbl = nbl[:int(0.8*len(nbl))] dev_nbl = nbl[int(0.8*len(nbl)):int(0.9*len(nbl))] test_nbl = nbl[int(0.9*len(nbl)):] save_wavaskey(fn_train_nbl, dict(train_nbl)) save_wavaskey(fn_dev_nbl, dict(dev_nbl)) save_wavaskey(fn_test_nbl, dict(test_nbl)) # nbl_hdc_sem train_nbl_hdc_sem = nbl_hdc_sem[:int(0.8*len(nbl_hdc_sem))] dev_nbl_hdc_sem = nbl_hdc_sem[int(0.8*len(nbl_hdc_sem)):int(0.9*len(nbl_hdc_sem))] test_nbl_hdc_sem = nbl_hdc_sem[int(0.9*len(nbl_hdc_sem)):] save_wavaskey(fn_train_nbl_hdc_sem, dict(train_nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_dev_nbl_hdc_sem, dict(dev_nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&')))) save_wavaskey(fn_test_nbl_hdc_sem, dict(test_nbl_hdc_sem), trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
help='Directory which should contain symlinks or directories with transcribed ASR') parser_b = subparsers.add_parser( 'load', help='Load wav transcriptions and reference with full paths to wavs') parser_b.add_argument( 'reference', help='Key value file: Keys contain paths to wav files. Values are reference transcriptions.') args = parser.parse_args() if os.path.exists(args.out_dir): if not args.f: print "\nThe directory '%s' already exists!\n" % args.out_dir parser.print_usage() parser.exit() else: # create the dir try: os.makedirs(args.out_dir) except OSError as exc: if exc.errno != errno.EEXIST or os.path.isdir(args.out_dir): raise exc cfg = Config.load_configs(args.configs, use_default=True) asr = asr_factory(cfg) if args.command == 'extract': extract_from_xml(args.indomain_data_dir, args.out_dir, cfg) elif args.command == 'load': decode_with_reference(args.reference, args.out_dir, args.num_workers) else: raise Exception('Argparse mechanism failed: Should never happen')
def extract_from_xml(indomain_data_dir, outdir, cfg): """Extract transcription and Waves from xml Args: indomain_data_dir(path): path where the xml logs are stored outdir: directory to save the references and wave, Wav file names pairs cfg: Alex configuration """ glob = 'asr_transcribed.xml' asr = asr_factory(cfg) print 'Collecting files under %s with glob %s' % (indomain_data_dir, glob) files = [] for root, dirnames, filenames in os.walk(indomain_data_dir, followlinks=True): for filename in fnmatch.filter(filenames, glob): files.append(os.path.join(root, filename)) # DEBUG example # files = [ # '/ha/projects/vystadial/data/call-logs/2013-05-30-alex-aotb-prototype/part1/2013-06-27-09-33-25.116055-CEST-00420221914256/asr_transcribed.xml'] try: trn, dec, dec_len, wav_len = [], [], [], [] for fn in files: doc = xml.dom.minidom.parse(fn) turns = doc.getElementsByTagName("turn") f_dir = os.path.dirname(fn) for turn in turns: if turn.getAttribute('speaker') != 'user': continue recs = turn.getElementsByTagName("rec") trans = turn.getElementsByTagName("asr_transcription") if len(recs) != 1: print "Skipping a turn {turn} in file: {fn} - recs: {recs}".format( turn=turn.getAttribute('turn_number'), fn=fn, recs=len(recs)) continue if len(trans) == 0: print "Skipping a turn in {fn} - trans: {trans}".format( fn=fn, trans=len(trans)) continue wav_file = recs[0].getAttribute('fname') # FIXME: Check whether the last transcription is really the best! FJ t = various.get_text_from_xml_node(trans[-1]) t = normalise_text(t) if exclude_lm(t): continue # TODO is it still valid? OP # The silence does not have a label in the language model. t = t.replace('_SIL_', '') trn.append((wav_file, t)) wav_path = os.path.join(f_dir, wav_file) best, dec_dur, fw_dur, wav_dur = decode_info( asr, cfg, outdir, wav_path, t) dec.append((wav_file, best)) wav_len.append((wav_file, wav_dur)) dec_len.append((wav_file, dec_dur)) except Exception as e: print 'PARTIAL RESULTS were saved to %s' % outdir print e raise e finally: trn_dict = dict(trn) dec_dict = dict(dec) wavlen_dict = dict(wav_len) declen_dict = dict(dec_len) compute_save_stat(outdir, trn_dict, dec_dict, wavlen_dict, declen_dict)
parser_b.add_argument( 'reference', help= 'Key value file: Keys contain paths to wav files. Values are reference transcriptions.' ) args = parser.parse_args() if os.path.exists(args.out_dir): if not args.f: print "\nThe directory '%s' already exists!\n" % args.out_dir parser.print_usage() parser.exit() else: # create the dir try: os.makedirs(args.out_dir) except OSError as exc: if exc.errno != errno.EEXIST or os.path.isdir(args.out_dir): raise exc cfg = Config.load_configs(args.configs, use_default=True) asr = asr_factory(cfg) if args.command == 'extract': extract_from_xml(args.indomain_data_dir, args.out_dir, cfg) elif args.command == 'load': decode_with_reference(args.reference, args.out_dir, args.num_workers) else: raise Exception('Argparse mechanism failed: Should never happen')
""" The script has commands: --asr-log it uses the asr hypotheses from call logs """ asr_log = 0 num_workers = 1 cldb = CategoryLabelDatabase('../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) slu = PTICSHDCSLU(preprocessing, cfg = {'SLU': {PTICSHDCSLU: {'utt2da': as_project_path("applications/PublicTransportInfoCS/data/utt2da_dict.txt")}}}) cfg = Config.load_configs(['../kaldi.cfg',], use_default=True) asr_rec = asr_factory(cfg) def normalise_semi_words(txt): # normalise these semi-words if txt == '__other__': txt = '_other_' elif txt == '__silence__': txt = '_other_' elif not txt: txt = '_other_' return txt def process_call_log(fn): name = multiprocessing.current_process().name asr = []