Example #1
0
def instantiate_feature(feature_name, questions, deep_data="data/deep"):
    """
    @param feature_name: The feature to instantiate
    @param questions: question database
    """

    feature = None
    print("Loading feature %s ..." % feature_name)
    if feature_name == "ir":
        feature = IrExtractor()

        wiki_mean = 0.0
        wiki_var = 1.0
        qb_mean = 0.0
        qb_var = 1.0
        source_mean = 0.0
        source_var = 1.0

        feature.add_index("wiki_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_wiki", kMIN_APPEARANCES),
                          wiki_mean, wiki_var)
        feature.add_index("qb_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_qb", kMIN_APPEARANCES),
                          qb_mean, qb_var)
        feature.add_index("source_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_source", kMIN_APPEARANCES),
                          source_mean, source_var)

    elif feature_name == "text":
        feature = TextExtractor()
    elif feature_name == "lm":
        feature = LanguageModel("data/language_model")
        feature.add_corpus("qb")
        feature.add_corpus("wiki")
        feature.add_corpus("source")
    elif feature_name == "deep":
        print("from %s" % deep_data)
        page_dict = {}
        for page in questions.get_all_pages():
            page_dict[page.lower().replace(' ', '_')] = page
        feature = DeepExtractor("%s/classifier" % deep_data, \
            "%s/params" % deep_data, "%s/vocab" % deep_data, \
            "data/common/ners", page_dict, 200)
    elif feature_name == "wikilinks":
        feature = WikiLinks()
    elif feature_name == "answer_present":
        feature = AnswerPresent()
    elif feature_name == "label":
        feature = Labeler(questions)
    elif feature_name == "classifier":
        feature = Classifier('data/classifier/bigrams.pkl', questions)
    elif feature_name == "mentions":
        feature = Mentions(questions, kMIN_APPEARANCES)
    else:
        print("Don't know what to do with %s" % feature_name)
    print("done")
    return feature
Example #2
0
def instantiate_feature(feature_name, questions):
    """
    @param feature_name: The feature to instantiate
    @param questions: question database
    """

    feature = None
    print("Loading feature %s ..." % feature_name)
    if feature_name == "ir":
        feature = IrExtractor()

        wiki_mean = 0.0
        wiki_var = 1.0
        qb_mean = 0.0
        qb_var = 1.0
        source_mean = 0.0
        source_var = 1.0

        feature.add_index("wiki_%i" % kMIN_APPEARANCES,
                          "%s_%i" % ("data/ir/whoosh_wiki", kMIN_APPEARANCES),
                          wiki_mean, wiki_var)
        feature.add_index("qb_%i" % kMIN_APPEARANCES,
                          "%s_%i" % ("data/ir/whoosh_qb", kMIN_APPEARANCES),
                          qb_mean, qb_var)
        feature.add_index(
            "source_%i" % kMIN_APPEARANCES,
            "%s_%i" % ("data/ir/whoosh_source", kMIN_APPEARANCES), source_mean,
            source_var)

    elif feature_name == "text":
        feature = TextExtractor()
    elif feature_name == "lm":
        feature = LanguageModel("data/lm.txt")
        feature.add_corpus("qb")
        feature.add_corpus("wiki")
        feature.add_corpus("source")
    elif feature_name == "deep":
        page_dict = {}
        for page in questions.get_all_pages():
            page_dict[page.lower().replace(' ', '_')] = page
        feature = DeepExtractor("data/deep/classifier", \
            "data/deep/params", "data/deep/vocab", \
            "data/common/ners", page_dict, 200)
    elif feature_name == "wikilinks":
        feature = WikiLinks()
    elif feature_name == "answer_present":
        feature = AnswerPresent()
    elif feature_name == "label":
        feature = Labeler(questions)
    elif feature_name == "classifier":
        feature = Classifier('data/classifier/bigrams.pkl', questions)
    else:
        print("Don't know what to do with %s" % feature_name)
    print("done")
    return feature