Ejemplo n.º 1
0
def run(train_file, label_file, flag):
    examples = []
    ex = Example(None, None, [])
    cur_entity = Entity(0, 0, None, None)
    text = ""
    entities = []
    with open(train_file, "r") as tr:
        with open(label_file, "r") as lb:
            for line in tr.readlines():
                if line == "\n":
                    if cur_entity.to_add:
                        entities.append(cur_entity)

                    cur_label = lb.readline()
                    ex.text = text
                    ex.intent = transform_intent(cur_label.split("\n")[0])
                    for ent in entities:
                        ent.find_start_end(ex.text)
                        ex.entities.append(ent)
                    examples.append(ex)
                    ex = Example(None, None, [])
                    text = ""
                    entities = []
                    cur_entity = Entity(0, 0, None, None)

                else:
                    word, tag = line.split("\t")
                    if text == "":
                        text = word
                    else:
                        text = text + " " + word
                    tag = tag.split("\n")[0]
                    if tag == "O":
                        if cur_entity.entity is not None:

                            entities.append(cur_entity)
                            cur_entity = Entity(0, 0, None, None)
                    elif tag.startswith("B"):
                        if cur_entity.entity is not None:

                            entities.append(cur_entity)
                            cur_entity = Entity(0, 0, word, tag[2:])
                        else:
                            cur_entity.entity = tag[2:]
                            cur_entity.value = word
                            cur_entity.to_add = True
                    elif tag.startswith("I"):
                        if cur_entity is not None:
                            if cur_entity.entity == tag[2:]:
                                cur_entity.value = cur_entity.value + " " + word
                                cur_entity.to_add = True

    final_json = {
        "rasa_nlu_data": {
            "common_examples": [example.get_json() for example in examples],
            "entity_examples": [],
            "intent_examples": []
        }
    }
    file_names = ["exact.json", "test.json"]
    with open("basic_intents.json") as f:
        data = json.load(f)
        for el in data:
            final_json["rasa_nlu_data"]["common_examples"].append(el)

    with open(file_names[flag], "w") as j:
        json.dump(final_json, j)