Beispiel #1
0
def main():
    file_name = 'input/BioASQ-task6bPhaseB-testset3.json'
    file_name = 'input/BioASQ-trainingDataset6b.json'
    file_name = 'input/BioASQ-trainingDataset5b.json'
    file_name = 'input/phaseB_5b_05.json'
    save_model_file_name = 'weights_2'
    ranker = SVMRank(save_model_file_name)
    data = DataLoader(file_name)
    data.load_ner_entities()
    ans_file = 'output/factoid_list_%s.json' % data.name

    questions = data.get_questions_of_type(C.FACTOID_TYPE)
    for i, question in enumerate(tqdm(questions)):
        ranked_sentences = question.ranked_sentences()
        X, candidates = get_only_features(question, ranked_sentences)
        top_answers = ranker.classify_from_feed(X, candidates, i)
        question.exact_answer = [[answer] for answer in top_answers[:5]]
        # question.exact_answer = [answer for answer in top_answers]
        # print question.exact_answer_ref
        # print '\n'
        # print top5
        # print '\n'
        # print '\n\n\n'
    questions = data.get_questions_of_type(C.LIST_TYPE)
    for i, question in enumerate(tqdm(questions)):
        ranked_sentences = question.ranked_sentences()
        X, candidates = get_only_features(question, ranked_sentences)
        top_answers = ranker.classify_from_feed(X, candidates, i)
        question.exact_answer = [[answer] for answer in top_answers[:10]]

    data.save_factoid_list_answers(ans_file)
Beispiel #2
0
def main():
    ranker = SVMRank()
    file_name = 'input/BioASQ-trainingDataset6b.json'
    data = DataLoader(file_name)
    data.load_ner_entities()
    questions = data.get_questions_of_type(C.FACTOID_TYPE)[:419]

    for i, question in enumerate(questions):
        ranked_sentences = question.ranked_sentences()
        X, y = get_features(question, ranked_sentences)
        ranker.feed(X, y, i)

    ranker.train_from_feed()
    ranker.save('weights_2')