def test_compile_features_for_word(self): context = {-2: set([("foo", 1), ("bar", 1), ("dur", 0.377)]), -1: set([("fii", 1), ("dur", 0.553)]), +1: set([("</s>", 1)])} local_feature_and_dur_seq = [(set([("bii", 1), ("boo", 1)]), 5), (set([("buu", 1), ("bee", 1)]), 6)] result = durmodel_utils.compile_features_for_word(context, local_feature_and_dur_seq) #print result[0] self.assertEqual(result[0][1], 5) self.assert_(("pos-2:foo", 1) in result[0][0]) self.assert_(("bii", 1) in result[0][0]) self.assert_(("pos-1:dur", 0.553) in result[0][0]) self.assert_(("pos-2:fii", 1) in result[1][0]) self.assert_(("pos+2:</s>", 1) in result[-2][0]) self.assert_(("pos+1:</s>", 1) in result[-1][0]) self.assert_(("pos-1:dur", durmodel_utils.dur_function(5)) in result[-1][0])
def test_compile_features_for_word(self): context = { -2: set([('foo', 1), ('bar', 1), ('dur', 0.377)]), -1: set([('fii', 1), ('dur', 0.553)]), +1: set([('</s>', 1)]) } local_feature_and_dur_seq = [(set([('bii', 1), ('boo', 1)]), 5), (set([('buu', 1), ('bee', 1)]), 6)] result = durmodel_utils.compile_features_for_word( context, local_feature_and_dur_seq) # print result[0] self.assertEqual(result[0][1], 5) self.assert_(('pos-2:foo', 1) in result[0][0]) self.assert_(('bii', 1) in result[0][0]) self.assert_(('pos-1:dur', 0.553) in result[0][0]) self.assert_(('pos-2:fii', 1) in result[1][0]) self.assert_(('pos+2:</s>', 1) in result[-2][0]) self.assert_(('pos+1:</s>', 1) in result[-1][0]) self.assert_(('pos-1:dur', durmodel_utils.dur_function(5)) in result[-1][0])
def test_compile_features_for_word(self): context = { -2: set([("foo", 1), ("bar", 1), ("dur", 0.377)]), -1: set([("fii", 1), ("dur", 0.553)]), +1: set([("</s>", 1)]) } local_feature_and_dur_seq = [(set([("bii", 1), ("boo", 1)]), 5), (set([("buu", 1), ("bee", 1)]), 6)] result = durmodel_utils.compile_features_for_word( context, local_feature_and_dur_seq) #print result[0] self.assertEqual(result[0][1], 5) self.assert_(("pos-2:foo", 1) in result[0][0]) self.assert_(("bii", 1) in result[0][0]) self.assert_(("pos-1:dur", 0.553) in result[0][0]) self.assert_(("pos-2:fii", 1) in result[1][0]) self.assert_(("pos+2:</s>", 1) in result[-2][0]) self.assert_(("pos+1:</s>", 1) in result[-1][0]) self.assert_(("pos-1:dur", durmodel_utils.dur_function(5)) in result[-1][0])
contexts = durmodel_utils.get_context_features_and_durs( lat, features_and_durs) assert len(contexts) == len(features_and_durs) assert len(contexts) == len(lat.arcs) i = 0 for (context, local_feature_and_dur_seq) in zip(contexts, features_and_durs): #print >> sys.stderr, word_list[lat.arcs[i].word_id] if args.skip_fillers: if word_list[lat.arcs[i].word_id] in filler_words: i += 1 continue #print >> sys.stderr, "Processing word %s" % word_list[lat.arcs[i].word_id] full_word_features = durmodel_utils.compile_features_for_word( context, local_feature_and_dur_seq) num_phones = len(full_word_features) #print >> sys.stderr, lat.arcs[i].start_frame, lat.arcs[i].end_frame, word_list[lat.arcs[i].word_id].encode('utf-8') context_matrix = np.zeros((num_phones, len(feature_dict)), dtype=theano.config.floatX) if utt2spkid: speaker_vector = np.ones( (num_phones, 1), dtype=np.int) * utt2spkid[lat.name] else: speaker_vector = np.zeros((num_phones, 1), dtype=np.int) y = np.zeros((num_phones, 1), dtype=theano.config.floatX) for (j, (phone_features, dur)) in enumerate(full_word_features): #print >> sys.stderr, " phone %d" % (j)
features_and_dur_seq = durmodel_utils.make_local(arc.start_frame, arc.word_id, arc.phone_ids, transitions, word_list, nonsilence_phonemes, language=args.language, stress_dict=stress_dict) features_and_durs.append(features_and_dur_seq) contexts = durmodel_utils.get_context_features_and_durs(lat, features_and_durs) assert len(contexts) == len(features_and_durs) assert len(contexts) == len(lat.arcs) i = 0 for (context, local_feature_and_dur_seq) in zip(contexts, features_and_durs): #print >> sys.stderr, word_list[lat.arcs[i].word_id] if args.skip_fillers: if word_list[lat.arcs[i].word_id] in filler_words: i += 1 continue #print >> sys.stderr, "Processing word %s" % word_list[lat.arcs[i].word_id] full_word_features = durmodel_utils.compile_features_for_word(context, local_feature_and_dur_seq) num_phones = len(full_word_features) #print >> sys.stderr, lat.arcs[i].start_frame, lat.arcs[i].end_frame, word_list[lat.arcs[i].word_id].encode('utf-8') context_matrix = np.zeros((num_phones, len(feature_dict)), dtype=theano.config.floatX) if utt2spkid: speaker_vector = np.ones((num_phones, 1), dtype=np.int) * utt2spkid[lat.name] else: speaker_vector = np.zeros((num_phones, 1), dtype=np.int) y = np.zeros((num_phones, 1), dtype=theano.config.floatX) for (j, (phone_features, dur)) in enumerate(full_word_features): #print >> sys.stderr, " phone %d" % (j) for (feature_name, value) in phone_features: feature_id = feature_dict.get(feature_name, -1) if feature_id >= 0: