def main(): file_name = 'input/BioASQ-task6bPhaseB-testset3.json' file_name = 'input/BioASQ-trainingDataset6b.json' file_name = 'input/BioASQ-trainingDataset5b.json' file_name = 'input/phaseB_5b_05.json' save_model_file_name = 'weights_2' ranker = SVMRank(save_model_file_name) data = DataLoader(file_name) data.load_ner_entities() ans_file = 'output/factoid_list_%s.json' % data.name questions = data.get_questions_of_type(C.FACTOID_TYPE) for i, question in enumerate(tqdm(questions)): ranked_sentences = question.ranked_sentences() X, candidates = get_only_features(question, ranked_sentences) top_answers = ranker.classify_from_feed(X, candidates, i) question.exact_answer = [[answer] for answer in top_answers[:5]] # question.exact_answer = [answer for answer in top_answers] # print question.exact_answer_ref # print '\n' # print top5 # print '\n' # print '\n\n\n' questions = data.get_questions_of_type(C.LIST_TYPE) for i, question in enumerate(tqdm(questions)): ranked_sentences = question.ranked_sentences() X, candidates = get_only_features(question, ranked_sentences) top_answers = ranker.classify_from_feed(X, candidates, i) question.exact_answer = [[answer] for answer in top_answers[:10]] data.save_factoid_list_answers(ans_file)
def main(): ranker = SVMRank() file_name = 'input/BioASQ-trainingDataset6b.json' data = DataLoader(file_name) data.load_ner_entities() questions = data.get_questions_of_type(C.FACTOID_TYPE)[:419] for i, question in enumerate(questions): ranked_sentences = question.ranked_sentences() X, y = get_features(question, ranked_sentences) ranker.feed(X, y, i) ranker.train_from_feed() ranker.save('weights_2')