def test_compile_features_for_word(self):
        context = {-2: set([("foo", 1), ("bar", 1), ("dur", 0.377)]), -1: set([("fii", 1), ("dur", 0.553)]), +1: set([("</s>", 1)])}
        local_feature_and_dur_seq = [(set([("bii", 1), ("boo", 1)]), 5), (set([("buu", 1), ("bee", 1)]), 6)]
        result = durmodel_utils.compile_features_for_word(context, local_feature_and_dur_seq)
        #print result[0]
        self.assertEqual(result[0][1], 5)
        self.assert_(("pos-2:foo", 1) in result[0][0])
        self.assert_(("bii", 1) in result[0][0])
        self.assert_(("pos-1:dur", 0.553) in result[0][0])
        self.assert_(("pos-2:fii", 1) in result[1][0])
        self.assert_(("pos+2:</s>", 1) in result[-2][0])
        self.assert_(("pos+1:</s>", 1) in result[-1][0])

        self.assert_(("pos-1:dur", durmodel_utils.dur_function(5)) in result[-1][0])
Ejemplo n.º 2
0
    def test_compile_features_for_word(self):
        context = {
            -2: set([('foo', 1), ('bar', 1), ('dur', 0.377)]),
            -1: set([('fii', 1), ('dur', 0.553)]),
            +1: set([('</s>', 1)])
        }
        local_feature_and_dur_seq = [(set([('bii', 1), ('boo', 1)]), 5),
                                     (set([('buu', 1), ('bee', 1)]), 6)]
        result = durmodel_utils.compile_features_for_word(
            context, local_feature_and_dur_seq)
        # print result[0]
        self.assertEqual(result[0][1], 5)
        self.assert_(('pos-2:foo', 1) in result[0][0])
        self.assert_(('bii', 1) in result[0][0])
        self.assert_(('pos-1:dur', 0.553) in result[0][0])
        self.assert_(('pos-2:fii', 1) in result[1][0])
        self.assert_(('pos+2:</s>', 1) in result[-2][0])
        self.assert_(('pos+1:</s>', 1) in result[-1][0])

        self.assert_(('pos-1:dur',
                      durmodel_utils.dur_function(5)) in result[-1][0])
Ejemplo n.º 3
0
    def test_compile_features_for_word(self):
        context = {
            -2: set([("foo", 1), ("bar", 1), ("dur", 0.377)]),
            -1: set([("fii", 1), ("dur", 0.553)]),
            +1: set([("</s>", 1)])
        }
        local_feature_and_dur_seq = [(set([("bii", 1), ("boo", 1)]), 5),
                                     (set([("buu", 1), ("bee", 1)]), 6)]
        result = durmodel_utils.compile_features_for_word(
            context, local_feature_and_dur_seq)
        #print result[0]
        self.assertEqual(result[0][1], 5)
        self.assert_(("pos-2:foo", 1) in result[0][0])
        self.assert_(("bii", 1) in result[0][0])
        self.assert_(("pos-1:dur", 0.553) in result[0][0])
        self.assert_(("pos-2:fii", 1) in result[1][0])
        self.assert_(("pos+2:</s>", 1) in result[-2][0])
        self.assert_(("pos+1:</s>", 1) in result[-1][0])

        self.assert_(("pos-1:dur",
                      durmodel_utils.dur_function(5)) in result[-1][0])
Ejemplo n.º 4
0
            contexts = durmodel_utils.get_context_features_and_durs(
                lat, features_and_durs)
            assert len(contexts) == len(features_and_durs)
            assert len(contexts) == len(lat.arcs)
            i = 0
            for (context,
                 local_feature_and_dur_seq) in zip(contexts,
                                                   features_and_durs):
                #print >> sys.stderr, word_list[lat.arcs[i].word_id]
                if args.skip_fillers:
                    if word_list[lat.arcs[i].word_id] in filler_words:
                        i += 1
                        continue

                #print >> sys.stderr, "Processing word %s" % word_list[lat.arcs[i].word_id]
                full_word_features = durmodel_utils.compile_features_for_word(
                    context, local_feature_and_dur_seq)
                num_phones = len(full_word_features)
                #print >> sys.stderr, lat.arcs[i].start_frame, lat.arcs[i].end_frame, word_list[lat.arcs[i].word_id].encode('utf-8')

                context_matrix = np.zeros((num_phones, len(feature_dict)),
                                          dtype=theano.config.floatX)
                if utt2spkid:
                    speaker_vector = np.ones(
                        (num_phones, 1), dtype=np.int) * utt2spkid[lat.name]
                else:
                    speaker_vector = np.zeros((num_phones, 1), dtype=np.int)

                y = np.zeros((num_phones, 1), dtype=theano.config.floatX)
                for (j, (phone_features,
                         dur)) in enumerate(full_word_features):
                    #print >> sys.stderr, "  phone %d" % (j)
Ejemplo n.º 5
0
                features_and_dur_seq = durmodel_utils.make_local(arc.start_frame, arc.word_id, arc.phone_ids, transitions, word_list,
                                                                 nonsilence_phonemes, language=args.language, stress_dict=stress_dict)
                features_and_durs.append(features_and_dur_seq)
            contexts = durmodel_utils.get_context_features_and_durs(lat, features_and_durs)
            assert len(contexts) == len(features_and_durs)
            assert len(contexts) == len(lat.arcs)
            i = 0
            for (context, local_feature_and_dur_seq) in zip(contexts, features_and_durs):
                #print >> sys.stderr, word_list[lat.arcs[i].word_id]
                if args.skip_fillers:
                    if word_list[lat.arcs[i].word_id] in filler_words:
                        i += 1
                        continue

                #print >> sys.stderr, "Processing word %s" % word_list[lat.arcs[i].word_id]
                full_word_features = durmodel_utils.compile_features_for_word(context, local_feature_and_dur_seq)
                num_phones = len(full_word_features)
                #print >> sys.stderr, lat.arcs[i].start_frame, lat.arcs[i].end_frame, word_list[lat.arcs[i].word_id].encode('utf-8')

                context_matrix = np.zeros((num_phones, len(feature_dict)), dtype=theano.config.floatX)
                if utt2spkid:
                    speaker_vector = np.ones((num_phones, 1), dtype=np.int) * utt2spkid[lat.name]
                else:
                    speaker_vector = np.zeros((num_phones, 1), dtype=np.int)

                y = np.zeros((num_phones, 1), dtype=theano.config.floatX)
                for (j, (phone_features, dur)) in enumerate(full_word_features):
                    #print >> sys.stderr, "  phone %d" % (j)
                    for (feature_name, value) in phone_features:
                        feature_id = feature_dict.get(feature_name, -1)
                        if feature_id >= 0: