Beispiel #1
0
                      type=str,
                      default='data/wiki_index.pkl',
                      help='page title candiates')
    args.add_argument('--labels',
                      type=str,
                      default='data/map/ans_to_wiki',
                      help='write page assignment answers')
    args = args.parse_args()

    # Open up the database
    d = QuestionDatabase(args.database)
    page_diversity = d.answer_map(normalize)

    # Set up the active learner for writing assignments
    al = ActiveLearner(None, args.labels)
    existing_labels = set(x[0] for x in al.human_labeled())

    # get the candidates we want to assign to pages
    answers = d.unmatched_answers(existing_labels)
    print(answers.keys()[:10])

    # Open up the title finder
    tf = TitleFinder(open(args.titles))

    for ans, count in sorted(answers.items(),
                             key=lambda x: sum(x[1].values()),
                             reverse=True):
        if ans in kBAD_ANSWERS:
            continue
        choices = list(tf.query(ans))
        print("--------- (%i)" % sum(count.values()))
    args = argparse.ArgumentParser('Interactive assign pages to questions')
    args.add_argument('--database', type=str, default='data/questions.db',
                      help='sqlite3 database of questions')
    args.add_argument('--titles', type=str, default='data/wiki_index.pkl',
                      help='page title candiates')
    args.add_argument('--labels', type=str, default='data/map/ans_to_wiki',
                      help='write page assignment answers')
    args = args.parse_args()

    # Open up the database
    d = QuestionDatabase(args.database)
    page_diversity = d.answer_map(normalize)

    # Set up the active learner for writing assignments
    al = ActiveLearner(None, args.labels)
    existing_labels = set(x[0] for x in al.human_labeled())

    # get the candidates we want to assign to pages
    answers = d.unmatched_answers(existing_labels)
    print(answers.keys()[:10])

    # Open up the title finder
    tf = TitleFinder(open(args.titles))

    for ans, count in sorted(answers.items(), key=lambda x: sum(x[1].values()),
                             reverse=True):
        if ans in kBAD_ANSWERS:
            continue
        choices = list(tf.query(ans))
        print("--------- (%i)" % sum(count.values()))
        print(ans)
Beispiel #3
0
import pickle
import time

from page_assignment.active_learning_for_matching import ActiveLearner
from util.qdb import QuestionDatabase


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="apply wikipedia pages")
    parser.add_argument("--db", default='data/questions.db', type=str,
                        help="The question database")
    parser.add_argument("--match_location", type=str,
                        default='data/map/ans_to_wiki_',
                        help="Where we read matches learned")

    flags = parser.parse_args()

    start = time.time()
    print("Loading db..")
    db = QuestionDatabase(flags.db)
    print("Loading classifier...")
    classifier = ActiveLearner(None, flags.match_location, [])

    for question, page in classifier.human_labeled():
        ans_type = ""
        db.set_answer_page(question, page, ans_type)
        print(question, page, "GIVEN", ans_type)