Example #1
0
def build_question(corpus, index, word_dict):
    all_data_token = preprocessor.get_sequence_tokens_with_turn(
        corpus, word_dict)
    data_length = len(all_data_token['y'])
    if index >= data_length or index < 0:
        print("Index is out of the range")
        return
    else:
        question_tokens = preprocessor.get_sequence_tokens_with_turn(
            corpus, word_dict)
        question = question_tokens['c'][index]
        return question
Example #2
0
def dam_output(input,SINGLEMODEL):
    # define model class
    model = net.Net(conf)

    # if no bilstm, should work out with the proposed answer by itself; otherwise, get the proposed answers from bilstm
    if SINGLEMODEL == 1:
        key_words_list = ["input classification", "output", "context"]
        cls_indexs, question_text, answers_text,word_dict = prepare_data(data_path)
        for number, question in enumerate(question_text):
            if question == input:
                break
        question_number = [number]
        indexs,all_data = prepare_q_a_data(question_number,cls_indexs, question_text, answers_text,word_dict,key_words_list,model)
        output = pop_answers(indexs,question_text,question_number,all_data)
    else:
        cls_indexs, question_text, answers_text, word_dict = prepare_data(data_path)
        print(f'question is:{input}')
        questions = input
        q_a_set = build_bilstm_qa(questions, question_text, answers_text)
        text_data_classified = preprocessor.get_sequence_tokens_with_turn(q_a_set, word_dict)
        indexs, answers = predict.test(conf, model, text_data_classified)
        answer_data = q_a_set[indexs]
        this_answer = answer_data.split('\t')[-1]
        print(f'answer is: {this_answer}')
    return output
Example #3
0
def build_candidate_answers(corpus, word_dict):
    all_data_token = preprocessor.get_sequence_tokens_with_turn(
        corpus, word_dict)
    all_positive_answers = []
    data_length = len(all_data_token['y'])
    for index in range(data_length):
        if all_data_token['y'][index] == 1:
            all_positive_answers.append(all_data_token['r'][index])
    return all_positive_answers
Example #4
0
def prepare_q_a_data(question_number,cls_indexs, question_text, answers_text,word_dict,key_words_list,model):
    all_data = []
    for index in question_number:
        #    print(f'the {index} question is:{question_text[index]}')
        question = question_text[index]
        positive_answer, negative_answers, negative_answers_index = \
            generate_data.generate_candidate_answers(question,key_words_list,cls_indexs, question_text, answers_text)
        negative_answers_index.insert(0, index)
        all_data.append(positive_answer[0])
        #    print(positive_answer[0])
        for item in negative_answers:
            all_data.append(item)
        print(all_data)
    text_data_classified = preprocessor.get_sequence_tokens_with_turn(all_data, word_dict)
    indexs, answers = predict.test(conf, model, text_data_classified)
    print(indexs)
    return indexs,all_data
Example #5
0
                                   answers_text)
    all_data_file = data_path + "all_classified_data.txt"
    with open(all_data_file, 'w') as f:
        for item in all_data:
            line = item
            f.write("%s" % item)
        f.close()

    data_file = "../data/all_classified_data.txt"

    corpus, train, val, test = data_split(data_file, 0.7, 0.2)
    texts = preprocessor.get_texts(corpus)

    word_dict = preprocessor.generate_word_dict(texts)

    train_sequence_tokens = preprocessor.get_sequence_tokens_with_turn(
        train, word_dict)
    val_sequence_tokens = preprocessor.get_sequence_tokens_with_turn(
        val, word_dict)
    test_sequence_tokens = preprocessor.get_sequence_tokens_with_turn(
        test, word_dict)
    data_tokens = {'train': [], 'val': [], 'test': []}
    data_tokens['train'] = train_sequence_tokens
    data_tokens['val'] = val_sequence_tokens
    data_tokens['test'] = test_sequence_tokens

    preprocessor.dump_data_to_pkl(data_tokens,
                                  data_path + 'classified_data_split')
    preprocessor.dump_data_to_pkl(test_sequence_tokens,
                                  data_path + 'classified_test')
    preprocessor.dump_data_to_pkl(train_sequence_tokens,
                                  data_path + 'nclassified_train')