コード例 #1
0
kBAD_ANSWERS = ["", "red river", "the", "figaro", "normal", "s", "p"]

if __name__ == "__main__":
    args = argparse.ArgumentParser('Interactive assign pages to questions')
    args.add_argument('--database', type=str, default='data/questions.db',
                      help='sqlite3 database of questions')
    args.add_argument('--titles', type=str, default='data/wiki_index.pkl',
                      help='page title candiates')
    args.add_argument('--labels', type=str, default='data/map/ans_to_wiki',
                      help='write page assignment answers')
    args = args.parse_args()

    # Open up the database
    d = QuestionDatabase(args.database)
    page_diversity = d.answer_map(normalize)

    # Set up the active learner for writing assignments
    al = ActiveLearner(None, args.labels)
    existing_labels = set(x[0] for x in al.human_labeled())

    # get the candidates we want to assign to pages
    answers = d.unmatched_answers(existing_labels)
    print(answers.keys()[:10])

    # Open up the title finder
    tf = TitleFinder(open(args.titles))

    for ans, count in sorted(answers.items(), key=lambda x: sum(x[1].values()),
                             reverse=True):
        if ans in kBAD_ANSWERS:
コード例 #2
0
ファイル: naqt.py プロジェクト: cequencer/qb
            title, rest = rest.split('"', 1)
            self.topics[int(id)] = title


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description='Import questions')
    parser.add_argument('--naqt_path', type=str)
    parser.add_argument('--db', type=str, default='data/questions.db')

    flags = parser.parse_args()

    qdb = QuestionDatabase(flags.db)
    conn = qdb._conn
    answer_map = qdb.answer_map()

    # Find existing naqt questions
    c = conn.cursor()
    command = 'SELECT naqt FROM questions WHERE naqt >= 0;'
    c.execute(command)
    existing = set(int(x[0]) for x in c)

    num_skipped = 0
    last_id = kNAQT_START
    if flags.naqt_path:
        for qq in naqt_reader(flags.naqt_path):
            if qq.answer in answer_map and len(answer_map[qq.answer]) == 1:
                page = answer_map[qq.answer].keys()[0]
            else:
                page = ""
コード例 #3
0
            title, rest = rest.split('"', 1)
            self.topics[int(id)] = title


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description='Import questions')
    parser.add_argument('--naqt_path', type=str)
    parser.add_argument('--db', type=str, default='data/questions.db')

    flags = parser.parse_args()

    qdb = QuestionDatabase(flags.db)
    conn = qdb._conn
    answer_map = qdb.answer_map()

    # Find existing naqt questions
    c = conn.cursor()
    command = 'SELECT naqt FROM questions WHERE naqt >= 0;'
    c.execute(command)
    existing = set(int(x[0]) for x in c)

    num_skipped = 0
    last_id = kNAQT_START
    if flags.naqt_path:
        for qq in naqt_reader(flags.naqt_path):
            if qq.answer in answer_map and len(answer_map[qq.answer]) == 1:
                page = answer_map[qq.answer].keys()[0]
            else:
                page = ""
コード例 #4
0
                      type=str,
                      default='data/questions.db',
                      help='sqlite3 database of questions')
    args.add_argument('--titles',
                      type=str,
                      default='data/wiki_index.pkl',
                      help='page title candiates')
    args.add_argument('--labels',
                      type=str,
                      default='data/map/ans_to_wiki',
                      help='write page assignment answers')
    args = args.parse_args()

    # Open up the database
    d = QuestionDatabase(args.database)
    page_diversity = d.answer_map(normalize)

    # Set up the active learner for writing assignments
    al = ActiveLearner(None, args.labels)
    existing_labels = set(x[0] for x in al.human_labeled())

    # get the candidates we want to assign to pages
    answers = d.unmatched_answers(existing_labels)
    print(answers.keys()[:10])

    # Open up the title finder
    tf = TitleFinder(open(args.titles))

    for ans, count in sorted(answers.items(),
                             key=lambda x: sum(x[1].values()),
                             reverse=True):