Beispiel #1
0
def turn_to_examples(t, ontology, tokenizer):
    examples = []
    user_transcript = t.transcript
    if isinstance(user_transcript, list):
        user_transcript = ' '.join(user_transcript)
    if len(t.asr) > 0: user_transcript = t.asr[0][0]
    context = ' '.join([t.system_transcript] + [SEP] + [user_transcript])
    turn_label = set([(s, v) for s, v in t.turn_label])
    for slot in ontology.slots:
        for value in ontology.values[slot]:
            candidate = slot + ' = ' + value

            # Prepare input_ids
            input_text = ' '.join([CLS, context, SEP, candidate, SEP])
            tokenized_text = tokenizer.tokenize(input_text)
            input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)

            # Prepare token_type_ids
            sent1_len = rindex(tokenized_text[:-1], SEP) + 1
            sent2_len = len(tokenized_text) - sent1_len
            token_type_ids = [0] * sent1_len + [1] * sent2_len

            # Prepare label
            label = int((slot, value) in turn_label)

            # Update examples list
            examples.append((slot, value, input_ids, token_type_ids, label))
    return examples
Beispiel #2
0
def get_author_if_compilation(article):
    compilation = is_compilation(article)
    if compilation is not False:
        clean = re.compile("<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});")
        clean_text = str(re.sub(clean, "", compilation)).split()
        i = rindex(clean_text, "by")
        return " ".join(clean_text[i + 1:]), "compilation"
    return np.nan, "standalone"
Beispiel #3
0
 def claim_child(self, child):
     if not child.parent is None:
         ind = rindex(child.parent.children,child)
         del child.parent.children[ind]
     child.parent = self
     self.children.append(child)
Beispiel #4
0
 def claim_child(self, child):
     if not child.parent is None:
         ind = rindex(child.parent.children, child)
         del child.parent.children[ind]
     child.parent = self
     self.children.append(child)