def turn_to_examples(t, ontology, tokenizer): examples = [] user_transcript = t.transcript if isinstance(user_transcript, list): user_transcript = ' '.join(user_transcript) if len(t.asr) > 0: user_transcript = t.asr[0][0] context = ' '.join([t.system_transcript] + [SEP] + [user_transcript]) turn_label = set([(s, v) for s, v in t.turn_label]) for slot in ontology.slots: for value in ontology.values[slot]: candidate = slot + ' = ' + value # Prepare input_ids input_text = ' '.join([CLS, context, SEP, candidate, SEP]) tokenized_text = tokenizer.tokenize(input_text) input_ids = tokenizer.convert_tokens_to_ids(tokenized_text) # Prepare token_type_ids sent1_len = rindex(tokenized_text[:-1], SEP) + 1 sent2_len = len(tokenized_text) - sent1_len token_type_ids = [0] * sent1_len + [1] * sent2_len # Prepare label label = int((slot, value) in turn_label) # Update examples list examples.append((slot, value, input_ids, token_type_ids, label)) return examples
def get_author_if_compilation(article): compilation = is_compilation(article) if compilation is not False: clean = re.compile("<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});") clean_text = str(re.sub(clean, "", compilation)).split() i = rindex(clean_text, "by") return " ".join(clean_text[i + 1:]), "compilation" return np.nan, "standalone"
def claim_child(self, child): if not child.parent is None: ind = rindex(child.parent.children,child) del child.parent.children[ind] child.parent = self self.children.append(child)
def claim_child(self, child): if not child.parent is None: ind = rindex(child.parent.children, child) del child.parent.children[ind] child.parent = self self.children.append(child)