Ejemplo n.º 1
0
def _main(conf):
    conf.experiment_name = "{}_{}_{}".format(conf.task_name, conf.dataset,
                                             get_timestamp())
    conf.model_dirpath = os.path.join("../results", conf.experiment_name)

    try:
        print("Create model directory:", conf.model_dirpath)
        os.makedirs(conf.model_dirpath)
    except OSError as e:
        print(e)

    # load file paths for training and validation from config file
    train_questions = load_questions(conf.training_file_path)
    valid_questions = load_questions(conf.validation_file_path)

    if conf.max_questions < len(train_questions):
        train_questions = random.sample(train_questions, conf.max_questions)
    if conf.max_questions < len(valid_questions):
        valid_questions = random.sample(valid_questions, conf.max_questions)

    print("#Train file instances: " + str(len(train_questions)))
    print("#Valid file instances: " + str(len(valid_questions)))

    res = load_resources(conf)

    print("Word Vocabulary", res.word_embeddings.vocabulary)
    print("Char Vocabulary", res.char_vocabulary)
    if conf.use_graph_embeddings == 1:
        print("Graph Vocabulary", res.graph_embeddings.vocabulary)
    print("Entity Vocabulary", res.entity_vocabulary)
    print("Relation Vocabulary", res.relation_vocabulary)

    print("Configuration:")
    print(conf.description())

    print("build model ...")
    ranking_model = models.simple_joint_qa_predicate_model(conf, res)

    ranking_model.summary()

    print("Save configuration ...")
    conf.save(os.path.join(conf.model_dirpath, "configuration.json"))

    train(train_questions, valid_questions, ranking_model, conf, res)

    print("Save weights ...")
    ranking_model.save_weights(os.path.join(conf.model_dirpath,
                                            "answer_ranking_model.hdf5"),
                               overwrite=True)
    print("Done!")
Ejemplo n.º 2
0
def stage2(players):
    print("Lets begin the second stage!")
    first_question = True
    current_player = players[0]
    questions2 = load_questions(25, stage=2)
    question_counter = 0
    while len(players) > 3:
        if not first_question:
            current_player = select_player(players, select=True)
        else:
            first_question = False
        try:
            question = questions2[question_counter]
            correct = ask_question(current_player, question)
        except IndexError:
            print("The pool of questions has ended")
            correct = ask_question(current_player)
        check_current_player_chances(players,
                                     current_player,
                                     correct,
                                     stage="stage2")
        question_counter += 1
    print("The players going to the FINAL STAGE are:")
    for player in players:
        print(player)
    return players
def main(questions_filepath, output_filepath):
    questions = load_questions(questions_filepath)

    with open(output_filepath, "w") as fout:
        for q in questions:
            text = q["text"]
            # subject = q["subject"]
            predicate = q["predicate"]
            # for c in q["candidates"]:
            #     if c["uri"] == subject:
            #         ngram = q["ngram"]
            fout.write(LABEL_PREFIX + predicate + " " + text + "\n")
Ejemplo n.º 4
0
def print_questions(docs_json):
    qs = load_questions('test.tsv')
    for i in range(200):
        print(" ")
        print(i)
        print(qs.questions[i].question)
        print(docs_json[str(qs.questions[i].document_id)][str(
            qs.questions[i].passages[0])])
        print(process_and_tokenize_string(qs.questions[i].question))
        print(
            process_and_tokenize_string(docs_json[str(
                qs.questions[i].document_id)][str(
                    qs.questions[i].passages[0])]))
Ejemplo n.º 5
0
def main(questions_filepath, experiment_dir):
    conf_filepath = experiment_dir + "/configuration.json"
    conf = Configuration.load(conf_filepath)

    if conf.use_ner:
        print("Using NER questions!")
        questions_filepath = "../res/test_after_ner.txt"
        # take top 7 candidate entities
        conf.test_top_candidates = 7

    print("Configuration:")
    print(conf.description())

    questions = load_questions(questions_filepath)

    if conf.max_questions < len(questions):
        questions = random.sample(questions, conf.max_questions)

    # if conf.max_questions:
    #     questions = random.sample(questions, conf.max_questions)

    print("#Questions:", len(questions))

    res = load_resources(conf)

    print("Word Vocabulary", res.word_embeddings.vocabulary)
    print("Char Vocabulary", res.char_vocabulary)
    print("Entity Vocabulary", res.entity_vocabulary)
    print("Relation Vocabulary", res.relation_vocabulary)

    print("build model ...")
    ranking_model = None
    ranking_model = models.simple_joint_qa(conf, res)
    ranking_model.load_weights(
        os.path.join(conf.model_dirpath, "best_answer_ranking_model.hdf5"))
    ranking_model.summary()

    # ranking_model = None

    print('Starting predicting ...\n')
    output_filepath = os.path.join(conf.model_dirpath, "predicted_answers.txt")
    predicted_data_batches = predict(questions, ranking_model, conf, res)
    print('Writing predictions to the model path')
    write_prediction(predicted_data_batches, output_filepath, conf)

    print('Writing prediction summary to the model path')
    recall_output_filepath = os.path.join(conf.model_dirpath,
                                          "prediction_summary.txt")
    recall_values = recall_at_n(predicted_data_batches, [1, 3, 5, 10, 20, 50],
                                conf)
    write_prediction_summary(recall_values, recall_output_filepath, conf)
Ejemplo n.º 6
0
def final_stage(players):
    print("Lets begin the final stage!")
    prepare_players_for_final(players)
    own_question = False
    thirty_points_reached = False
    max_num_of_questions = 40
    questions3 = load_questions(max_num_of_questions, stage=2)
    for n in range(max_num_of_questions):
        question = questions3[n]

        # the stage when 'first come first served'
        if not thirty_points_reached:
            correct = ask_question(question=question)
            current_player = select_player(players, select=True)
            check_current_player_chances(players,
                                         current_player,
                                         correct,
                                         stage="final_stage")
            if correct:
                add_points(current_player, own_question)
                thirty_points_reached = check_30_points(players)
        else:

            # getting previously answering player for the sake of own question
            previous_player = current_player
            current_player = select_player(players, select=True)

            # own question
            if current_player.post == previous_player.post and thirty_points_reached:
                own_question = True
            correct = ask_question(current_player, question=question)
            check_current_player_chances(players,
                                         current_player,
                                         correct,
                                         stage="final_stage")
            if correct:
                add_points(current_player, own_question)
            own_question = False
            # after the wrong the decision we come back to the stage when 'first come first served'
            if current_player == previous_player and not correct:
                thirty_points_reached = False

        present_scores(players)
        if check_winner(players):
            exit()
    # checking winner after the end of questions
    check_winner(players, end=True)
Ejemplo n.º 7
0
def export_to_file(docs_json, document_indexer, passage_indexer):
    if reindex_documents:
        print('reindexing_documents is on.. refusing to export to file')
        return

    data = []  # load_json_from_file('answers')
    start = 0

    qs = load_questions('test.tsv')
    document_indexer.index(docs_json, reindex_documents)
    question_count = len(qs.questions)
    for i in range(start, start + question_count):
        query = qs.questions[i]
        answers = []
        response = dict()
        response['id'] = query.qid
        response['answers'] = answers
        try:
            print('Processing question: ' + str(query.qid) + " (" + str(i) +
                  ")")
            top_docs = document_indexer.execute_query(query.question)
            sliced_docs = {
                top_doc.doc.get_id(): docs_json[str(top_doc.doc.get_id())]
                for top_doc in top_docs[0:2]
            }
            passage_indexer.index(sliced_docs, False)
            top_passages = passage_indexer.execute_query(query.question)
            for j in range(5):
                answer = {
                    'answer':
                    str(top_passages[j].passage.get_doc_id()) + ':' +
                    str(top_passages[j].passage.get_id()),
                    'score':
                    str(top_passages[j].get_score())
                }
                answers.append(answer)
            data.append(response)
        except Exception as e:
            print("error")

    with open('data\\answers.json', 'w', encoding='utf-8') as outfile:
        json.dump(data, outfile, ensure_ascii=False, indent=2)
def main(questions_filepath, conf):
    # conf = Configuration.load(config_filepath)
    # conf.padding_position = "pre"

    print("Configuration:")
    print(conf.description())

    questions = load_questions(questions_filepath)

    if conf.max_questions < len(questions):
        questions = random.sample(questions, conf.max_questions)

    # if conf.max_questions:
    #     questions = random.sample(questions, conf.max_questions)

    print("#Questions:", len(questions))

    res = load_resources(conf)

    print("Word Vocabulary", res.word_embeddings.vocabulary)
    print("Char Vocabulary", res.char_vocabulary)
    print("Entity Vocabulary", res.entity_vocabulary)
    print("Relation Vocabulary", res.relation_vocabulary)

    print("build model ...")
    ranking_model = None
    ranking_model = models.simple_joint_qa_predicate_model(conf, res)
    ranking_model.load_weights(os.path.join(conf.model_dirpath, "best_answer_ranking_model.hdf5"))
    ranking_model.summary()

    # ranking_model = None

    print('Starting predicting ...\n')
    output_filepath = os.path.join(conf.model_dirpath, "predicate_predicted_answers.txt")
    recall_output_filepath = os.path.join(conf.model_dirpath, "predicate_prediction_summary.txt")
    predicted_data = predict(questions, ranking_model, conf, res)

    recall_ranges = [1, 3, 5, 10, 20, 50]

    print('Computing recall values and writing predictions to the model path')
    compute_recall_and_write_predictions(predicted_data, output_filepath, recall_output_filepath, recall_ranges, conf,
                                         res)
Ejemplo n.º 9
0
def score_documents_retrieval(docs_json, document_indexer, passage_indexer):

    count = 0
    start = 70
    question_count = 20
    qs = load_questions('test.tsv')
    document_indexer.index(None, False)

    for i in range(start, start + question_count):

        query = qs.questions[i]
        top_docs = document_indexer.execute_query(query.question)
        sliced_docs = {
            top_doc.doc.get_id(): docs_json[str(top_doc.doc.get_id())]
            for top_doc in top_docs[0:3]
        }
        passage_indexer.index(sliced_docs, True)
        top_passages = passage_indexer.execute_query(query.question)

        print('\nExpected: document: ' + str(qs.questions[i].document_id) +
              " Passages: " + str(qs.questions[i].passages))
        print('Results:\n' + str(top_passages[0:10]))
Ejemplo n.º 10
0
def stage1(players):
    print("Let's begin the first stage")
    questions1 = load_questions(len(players), stage=1)
    num_of_players = len(players)
    for n in range((num_of_players + 1) * 2):
        current_player = select_player(players, num_of_players, n)
        if not current_player:
            continue
        try:
            question = questions1[n]
            correct = ask_question(current_player, question)
        except IndexError:
            print("The pool of questions has ended")
            correct = ask_question(current_player)
        check_current_player_chances(players,
                                     current_player,
                                     correct,
                                     stage="stage1")
        check_winner(players)
    print("The players going to the SECOND STAGE are:")
    for player in players:
        print(player)
    return players