def process_utterance(self, utt, make_label=True): utt_data = [] utt_questions = defaultdict(int) nodelist = utt.xpath(self.config["target_nodes"]) if nodelist == []: print( 'WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' % (self.config["target_nodes"])) for node in nodelist: self.htk_state_xpath = None ## make sure this is none. self.start_time_xpath = None self.end_time_xpath = None ## for phone!:-- node_data, node_questions = self.get_node_context_label(node) statelist = node.xpath('.//' + self.state_tag) assert statelist != [] for (i, state) in enumerate(statelist): state_ix = i + 2 state_node_data = "%s[%s]" % (node_data, state_ix) start_time = state.attrib.get(self.start_attribute, '_NA_') ## no time at runtime! end_time = state.attrib.get(self.end_attribute, '_NA_') if not (start_time == "_NA_" or end_time == "_NA_"): start_time = string.ljust(str(ms_to_htk(start_time)), 10) end_time = string.ljust(str(ms_to_htk(end_time)), 10) state_node_data = "%s %s %s" % (start_time, end_time, state_node_data) utt_data.append(state_node_data) ##utt_questions.update(node_questions) ## Sum the dictionaries' values: for question in node_questions: utt_questions[question] += node_questions[question] if make_label: label_file = utt.get_filename(self.config["output_filetype"]) writelist(utt_data, label_file, uni=True) return (utt_data, utt_questions ) ## for writing utterance-level labels,
def get_node_context_label(self, node): # print('in MFD get_node_context_label') context_vector = node.get_context_vector(self.contexts) mapped_context_vector = [] for (i, (name, value)) in enumerate(context_vector): if i in self.mappers: # mapped_names = [name + '=' + field_name for field_name in self.mappers[i].feature_names] mapped_values = self.mappers[i].lookup(value) # mapped_context_vector.extend(zip(mapped_names, mapped_values)) mapped_context_vector.extend(mapped_values) else: mapped_context_vector.append(value) ## add numbers: assert len(mapped_context_vector) == self.number_of_features context_vector = zip(range(self.number_of_features), self.mapped_feature_names, mapped_context_vector) # At this point, context_vector looks like this: # # [(0, u'll_segment:cmanner=affric', 0.0), (1, u'll_segment:cmanner=approx', 0.0), (2, u'll_segment:cmanner=fric', 0.0), (3, u'll_segment:cmanner=lateral', 0.0), (4, u'll_segment:cmanner=nasal', 1.0), (5, u'll_segment:cmanner=stop', 0.0), (6, u'll_segment:cplace=alveolar', 0.0), if self.context_separators == "numbers": formatted_context_vector = ["%s:%s" % (number, value) for (number, name, value) in context_vector] formatted_context_vector = "/".join(formatted_context_vector) formatted_context_vector = "/" + formatted_context_vector + "/" else: if self.context_separators == "spaces": separator = " " elif self.context_separators == "commas": separator = "," else: sys.exit("'%s' not a recognised separator" % (self.context_separators)) formatted_context_vector = [str(value) for (number, name, value) in context_vector] formatted_context_vector = separator.join(formatted_context_vector) if self.htk_monophone_xpath: ## PREpend an extra monophone feature -- appending will screw up ## extraction of sentence level contexts, which (currently) are ## assumed to be at the end of the model name: htk_monophone = node.safe_xpath(self.htk_monophone_xpath) formatted_context_vector = "-%s+%s" % (htk_monophone, formatted_context_vector) ## Don't need to add this to context questions -- just used for ## extracting monophones, not context clustering. ## TODO: find a neater way to handle this? Don't rely on HTK's ## inbuilt monophone extractor in the HTS-Training script? if self.htk_state_xpath: ## Increment to start state count at 2 as in HTK htk_state = node.safe_xpath(self.htk_state_xpath) formatted_context_vector = "%s[%s]" % (formatted_context_vector, htk_state + 1) if self.start_time_xpath and self.end_time_xpath: start_time = node.safe_xpath(self.start_time_xpath) end_time = node.safe_xpath(self.end_time_xpath) ## safe_xpath will give _NA_ when times are absent (i.e at runtime) -- ## in this case, omit times: if not (start_time == "_NA_" or end_time == "_NA_"): start_time = string.ljust(str(ms_to_htk(start_time)), 10) end_time = string.ljust(str(ms_to_htk(end_time)), 10) formatted_context_vector = "%s %s %s" % (start_time, end_time, formatted_context_vector) return formatted_context_vector
def get_node_context_label(self, node): """ 依据Utterance对象中的一个节点(如,segment节点),产生上下文标签。注:节点类型仍为Utterance对象 :param node: segment节点 :return: 上下文标签 """ context_vector = node.get_context_vector(self.contexts) ## add numbers: context_vector = [(number, name, value) for \ (number, (name, value)) in enumerate(context_vector)] node_questions = defaultdict(int) ## {} for triplet in context_vector: node_questions[triplet] += 1 ## store question's triplet as a key -- sort later ## Count can be used for filtering infreq. questions if self.context_separators == "numbers": formatted_context_vector = ["%s:%s" % (number, value) for (number, name, value) in context_vector] formatted_context_vector = "/".join(formatted_context_vector) formatted_context_vector = "/" + formatted_context_vector + "/" else: if self.context_separators == "spaces": separator = " " elif self.context_separators == "commas": separator = "," else: sys.exit("'%s' not a recognised separator" % (self.context_separators)) formatted_context_vector = [str(value) for (number, name, value) in context_vector] formatted_context_vector = separator.join(formatted_context_vector) if self.htk_monophone_xpath: ## PREpend an extra monophone feature -- appending will screw up ## extraction of sentence level contexts, which (currently) are ## assumed to be at the end of the model name: htk_monophone = node.safe_xpath(self.htk_monophone_xpath) formatted_context_vector = "-%s+%s" % (htk_monophone, formatted_context_vector) ## Don't need to add this to context questions -- just used for ## extracting monophones, not context clustering. ## TODO: find a neater way to handle this? Don't rely on HTK's ## inbuilt monophone extractor in the HTS-Training script? if self.htk_state_xpath: ## Increment to start state count at 2 as in HTK htk_state = node.safe_xpath(self.htk_state_xpath) formatted_context_vector = "%s[%s]" % (formatted_context_vector, htk_state + 1) if self.start_time_xpath and self.end_time_xpath: start_time = node.safe_xpath(self.start_time_xpath) end_time = node.safe_xpath(self.end_time_xpath) ## safe_xpath will give _NA_ when times are absent (i.e at runtime) -- ## in this case, omit times: if not (start_time == "_NA_" or end_time == "_NA_"): start_time = string.ljust(str(ms_to_htk(start_time)), 10) end_time = string.ljust(str(ms_to_htk(end_time)), 10) formatted_context_vector = "%s %s %s" % (start_time, end_time, formatted_context_vector) return (formatted_context_vector, node_questions)