Esempio n. 1
0
    def process_utterance(self, utt):
        #print('target nodes: %s' % utt.xpath(self.target_nodes))
        #   import pdb; pdb.set_trace()
        for node in utt.xpath(self.target_nodes):
            assert node.has_attribute(self.split_attribute)
            to_split = node.get(self.split_attribute)

            child_chunks = self.splitting_function(to_split)

            for chunk in child_chunks:
                #print '=='
                #print(chunk)
                child = Element(self.child_node_type)
                child.set(self.split_attribute, chunk)

                if self.add_token_classes:
                    token_class = self.classify_token(chunk)
                    #print(token_class)
                    child.set(self.class_attribute, token_class)

                if self.add_safetext:
                    token_safetext = self.safetext_token(chunk)
                    child.set(self.safetext_attribute, token_safetext)

                node.add_child(child)
Esempio n. 2
0
    def process_utterance(self, utt):

        # print 'target nodes: %s'%(utt.xpath(self.target_nodes))
        for node in utt.xpath(self.target_nodes):
            assert node.has_attribute(self.split_attribute)
            to_split = node.get(self.split_attribute)
            #normalised = self.normalise(to_split)
            normalised = self.normalise_ice(to_split)
            child_chunks = self.splitting_function(normalised)

            for chunk in child_chunks:
                # print '=='
                # print chunk
                child = Element(self.child_node_type)
                child.set(self.split_attribute, chunk)

                if self.add_token_classes:
                    token_class = self.classify_token(chunk)
                    # print token_class
                    child.set(self.class_attribute, token_class)

                if self.add_safetext:
                    token_safetext = self.safetext_token(chunk)
                    child.set(self.safetext_attribute, token_safetext)

                node.add_child(child)
Esempio n. 3
0
    def process_utterance(self, utt):
        for node in utt.xpath(self.target_nodes):
            assert node.has_attribute(self.class_attribute)
            assert node.has_attribute(self.target_attribute)

            current_class = node.attrib[self.class_attribute]

            if current_class in self.word_classes:
                word = node.attrib[self.target_attribute]
                pos = node.attrib.get(self.part_of_speech_attribute, None) # default to None 
                (pronunciation, method) = self.get_phonetic_segments(word, part_of_speech=pos)
                node.set('phones_from', method)
                NodeProcessors.add_syllable_structure(node, pronunciation, syllable_delimiter='|', syllable_tag='syllable', \
                                phone_tag='segment', pronunciation_attribute='pronunciation', stress_attribute='stress')
            elif current_class in self.probable_pause_classes:
                pronunciation = c.PROB_PAUSE # [c.PROB_PAUSE]
                child = Element('segment')
                child.set('pronunciation', pronunciation)
                node.add_child(child)
            elif current_class in self.possible_pause_classes:
                pronunciation = c.POSS_PAUSE # [c.POSS_PAUSE]
                child = Element('segment')
                child.set('pronunciation', pronunciation)
                node.add_child(child)                
            else:
                sys.exit('Class "%s" not in any of word_classes, probable_pause_classes, possible_pause_classes')
Esempio n. 4
0
    def process_utterance(self, utt):

        print('target nodes: %s' % (utt.xpath(self.target_nodes)))
        node_count = 0
        for node in utt.xpath(self.target_nodes):
            node_count += 1
            assert node.has_attribute(self.split_attribute)
            text_to_normalise = node.get(self.split_attribute)
            print(text_to_normalise)
            normalised = self.normalizer.normalize(text_to_normalise)
            # get tokenized arr from normalizer, don't split here again
            child_chunks = self.splitting_function(normalised)

            for chunk in child_chunks:
                # print '=='
                # print chunk
                child = Element(self.child_node_type)
                child.set(self.split_attribute, chunk)

                if self.add_token_classes:
                    token_class = self.classify_token(chunk)
                    # print token_class
                    child.set(self.class_attribute, token_class)

                if self.add_safetext:
                    token_safetext = self.safetext_token(chunk)
                    child.set(self.safetext_attribute, token_safetext)

                node.add_child(child)
Esempio n. 5
0
    def process_utterance(self, utt):
        for node in utt.xpath(self.target_nodes):
            assert node.has_attribute(self.class_attribute)
            assert node.has_attribute(self.target_attribute)

            current_class = node.attrib[self.class_attribute]

            if current_class in self.word_classes:
                word = node.attrib[self.target_attribute]
                children = self.get_phonetic_segments(word)
            elif current_class in self.probable_pause_classes:
                children = [c.PROB_PAUSE]
            elif current_class in self.possible_pause_classes:
                children = [c.POSS_PAUSE]
            else:
                sys.exit('Class "%s" not in any of word_classes, probable_pause_classes, possible_pause_classes')
            for chunk in children:
                child = Element(self.child_node_type)
                child.set(self.output_attribute, chunk)
                node.add_child(child)
Esempio n. 6
0
    def process_utterance(self, utt):

        #print 'target nodes: %s'%(utt.xpath(self.target_nodes))
        for node in utt.xpath(self.target_nodes):
            assert node.has_attribute(self.split_attribute)
            to_split = node.get(self.split_attribute)
            
            child_chunks = self.splitting_function(to_split)
            
            for chunk in child_chunks:
		if chunk == "_END_" :
                	continue
		child = Element(self.child_node_type)
                child.set(self.split_attribute, chunk)

                if self.add_token_classes:
                    token_class = self.classify_token(chunk)
                    child.set(self.class_attribute, token_class)

                if self.add_safetext:
                    token_safetext = self.safetext_token(chunk)
		    child.set(self.safetext_attribute, token_safetext)
                node.add_child(child)