def train(self, corpus, text_corpus): ''' Inherited method to do processor training. This just makes utterance objects from the XML files of the corpus, and filters the utterances in the corpus to find the ones this processor is configured to train on, and which have not been flagged as bad (status != OK). The actual processor-specific work should be subclassed in do_training ''' corpus = [Utterance(fname) for fname in corpus] corpus = [utt for utt in corpus if utt.get(self.train_on_utts_which_have)] corpus = [utt for utt in corpus if utt.get('status') == 'OK'] self.do_training(corpus, text_corpus)
# pylab.subplot('515') # # pylab.plot(recon) pylab.show() ### utt audio access: if False: import numpy utt = Utterance('/afs/inf.ed.ac.uk/group/cstr/projects/blizzard_entries/blizzard2016/tool/Ossian/train/en/speakers/fls_2016_segmented_TOY/english_blizz16_02_prom_annotation/utt/AMidsummerNightsDream_000.utt') print utt utt.pretty_print() print [utt.get('acoustic_stream_names') ] print '-----' for (word_i,word) in enumerate(utt.xpath('//token[@token_class="word"]')): print word.get('norm_text') d = utt.get_acoustic_features(word, 'lf0') i = utt.get_acoustic_features(word, 'lf0', interpolate_fzero=True) m = utt.get_acoustic_statistics(word, 'mgc', dim=0) s = utt.get_acoustic_statistics(word, 'mgc') l = utt.get_acoustic_statistics(utt, 'lf0', interpolate_fzero=True) print d
def __call__(self, utterance_file, utterance_location, mode): utterance = Utterance(utterance_file, utterance_location=utterance_location) return self.apply_to_utt(utterance, voice_mode=mode, save=True)