Example #1
0
    def process_utterance(self, utt, make_label=True):

        utt_data = []
        utt_questions = defaultdict(int)

        nodelist = utt.xpath(self.config["target_nodes"])
        if nodelist == []:
            print(
                'WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' %
                (self.config["target_nodes"]))

        for node in nodelist:

            self.htk_state_xpath = None  ## make sure this is none.
            self.start_time_xpath = None
            self.end_time_xpath = None

            ## for phone!:--
            node_data, node_questions = self.get_node_context_label(node)

            statelist = node.xpath('.//' + self.state_tag)
            assert statelist != []
            for (i, state) in enumerate(statelist):

                state_ix = i + 2
                state_node_data = "%s[%s]" % (node_data, state_ix)

                start_time = state.attrib.get(self.start_attribute,
                                              '_NA_')  ## no time at runtime!
                end_time = state.attrib.get(self.end_attribute, '_NA_')

                if not (start_time == "_NA_" or end_time == "_NA_"):

                    start_time = string.ljust(str(ms_to_htk(start_time)), 10)
                    end_time = string.ljust(str(ms_to_htk(end_time)), 10)

                    state_node_data = "%s %s %s" % (start_time, end_time,
                                                    state_node_data)

                utt_data.append(state_node_data)

            ##utt_questions.update(node_questions)
            ## Sum the dictionaries' values:
            for question in node_questions:
                utt_questions[question] += node_questions[question]

        if make_label:
            label_file = utt.get_filename(self.config["output_filetype"])
            writelist(utt_data, label_file, uni=True)

        return (utt_data, utt_questions
                )  ## for writing utterance-level labels,
Example #2
0
    def get_node_context_label(self, node):

        # print('in MFD get_node_context_label')
        context_vector = node.get_context_vector(self.contexts)

        mapped_context_vector = []
        for (i, (name, value)) in enumerate(context_vector):
            if i in self.mappers:
                # mapped_names = [name + '=' + field_name for field_name in self.mappers[i].feature_names]
                mapped_values = self.mappers[i].lookup(value)
                # mapped_context_vector.extend(zip(mapped_names, mapped_values))
                mapped_context_vector.extend(mapped_values)
            else:
                mapped_context_vector.append(value)

        ## add numbers:
        assert len(mapped_context_vector) == self.number_of_features

        context_vector = zip(range(self.number_of_features), self.mapped_feature_names, mapped_context_vector)

        # At this point, context_vector looks like this:
        #
        # [(0, u'll_segment:cmanner=affric', 0.0), (1, u'll_segment:cmanner=approx', 0.0), (2, u'll_segment:cmanner=fric', 0.0), (3, u'll_segment:cmanner=lateral', 0.0), (4, u'll_segment:cmanner=nasal', 1.0), (5, u'll_segment:cmanner=stop', 0.0), (6, u'll_segment:cplace=alveolar', 0.0),
        if self.context_separators == "numbers":

            formatted_context_vector = ["%s:%s" % (number, value)
                                        for (number, name, value) in context_vector]
            formatted_context_vector = "/".join(formatted_context_vector)
            formatted_context_vector = "/" + formatted_context_vector + "/"


        else:
            if self.context_separators == "spaces":
                separator = " "
            elif self.context_separators == "commas":
                separator = ","
            else:
                sys.exit("'%s' not a recognised separator" % (self.context_separators))

            formatted_context_vector = [str(value) for (number, name, value) in context_vector]
            formatted_context_vector = separator.join(formatted_context_vector)

        if self.htk_monophone_xpath:
            ## PREpend an extra monophone feature -- appending will screw up
            ## extraction of sentence level contexts, which (currently) are
            ## assumed to be at the end of the model name:
            htk_monophone = node.safe_xpath(self.htk_monophone_xpath)
            formatted_context_vector = "-%s+%s" % (htk_monophone, formatted_context_vector)
            ## Don't need to add this to context questions -- just used for 
            ## extracting monophones, not context clustering. 
            ## TODO: find a neater way to handle this? Don't rely on HTK's 
            ## inbuilt monophone extractor in the HTS-Training script?

        if self.htk_state_xpath:
            ## Increment to start state count at 2 as in HTK
            htk_state = node.safe_xpath(self.htk_state_xpath)
            formatted_context_vector = "%s[%s]" % (formatted_context_vector, htk_state + 1)

        if self.start_time_xpath and self.end_time_xpath:
            start_time = node.safe_xpath(self.start_time_xpath)
            end_time = node.safe_xpath(self.end_time_xpath)

            ## safe_xpath will give _NA_ when times are absent (i.e at runtime) --
            ## in this case, omit times:

            if not (start_time == "_NA_" or end_time == "_NA_"):
                start_time = string.ljust(str(ms_to_htk(start_time)), 10)
                end_time = string.ljust(str(ms_to_htk(end_time)), 10)

                formatted_context_vector = "%s %s %s" % (start_time, end_time, formatted_context_vector)
        return formatted_context_vector
Example #3
0
    def get_node_context_label(self, node):
        """
        依据Utterance对象中的一个节点(如,segment节点),产生上下文标签。注:节点类型仍为Utterance对象
        :param node: segment节点
        :return: 上下文标签
        """

        context_vector = node.get_context_vector(self.contexts)
        ## add numbers:
        context_vector = [(number, name, value) for \
                          (number, (name, value)) in enumerate(context_vector)]

        node_questions = defaultdict(int)  ## {}

        for triplet in context_vector:
            node_questions[triplet] += 1  ## store question's triplet as a key -- sort later  
            ## Count can be used for filtering infreq. questions

        if self.context_separators == "numbers":

            formatted_context_vector = ["%s:%s" % (number, value)
                                        for (number, name, value) in context_vector]
            formatted_context_vector = "/".join(formatted_context_vector)
            formatted_context_vector = "/" + formatted_context_vector + "/"


        else:
            if self.context_separators == "spaces":
                separator = " "
            elif self.context_separators == "commas":
                separator = ","
            else:
                sys.exit("'%s' not a recognised separator" % (self.context_separators))

            formatted_context_vector = [str(value) for (number, name, value) in context_vector]
            formatted_context_vector = separator.join(formatted_context_vector)

        if self.htk_monophone_xpath:
            ## PREpend an extra monophone feature -- appending will screw up
            ## extraction of sentence level contexts, which (currently) are
            ## assumed to be at the end of the model name:
            htk_monophone = node.safe_xpath(self.htk_monophone_xpath)
            formatted_context_vector = "-%s+%s" % (htk_monophone, formatted_context_vector)
            ## Don't need to add this to context questions -- just used for 
            ## extracting monophones, not context clustering. 
            ## TODO: find a neater way to handle this? Don't rely on HTK's 
            ## inbuilt monophone extractor in the HTS-Training script?

        if self.htk_state_xpath:
            ## Increment to start state count at 2 as in HTK
            htk_state = node.safe_xpath(self.htk_state_xpath)
            formatted_context_vector = "%s[%s]" % (formatted_context_vector, htk_state + 1)

        if self.start_time_xpath and self.end_time_xpath:
            start_time = node.safe_xpath(self.start_time_xpath)
            end_time = node.safe_xpath(self.end_time_xpath)

            ## safe_xpath will give _NA_ when times are absent (i.e at runtime) --
            ## in this case, omit times:

            if not (start_time == "_NA_" or end_time == "_NA_"):
                start_time = string.ljust(str(ms_to_htk(start_time)), 10)
                end_time = string.ljust(str(ms_to_htk(end_time)), 10)

                formatted_context_vector = "%s %s %s" % (start_time, end_time, formatted_context_vector)
        return (formatted_context_vector, node_questions)