Beispiel #1
0
def instantiate_feature(feature_name, questions):
    """
    @param feature_name: The feature to instantiate
    @param questions: question database
    @param first_pass_guess: Is this our first pass generating guesses?  (Used for standardizing IR scores)
    """

    feature = None
    print("Loading feature %s ..." % feature_name)
    if feature_name == "ir":
        feature = IrExtractor()

        wiki_mean = 0.0
        wiki_var = 1.0
        qb_mean = 0.0
        qb_var = 1.0

        feature.add_index("wiki_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_wiki", kMIN_APPEARANCES),
                          wiki_mean, wiki_var)
        feature.add_index("qb_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_qb", kMIN_APPEARANCES),
                          qb_mean, qb_var)

    elif feature_name == "text":
        feature = TextExtractor()
    elif feature_name == "lm":
        feature = pickle.load(open("data/lm.pkl"))
    elif feature_name == "deep":
        page_dict = {}
        for page in questions.get_all_pages():
            page_dict[page.lower().replace(' ', '_')] = page
        feature = DeepExtractor("data/deep/classifier", \
            "data/deep/params", "data/deep/vocab", \
            "data/common/ners", page_dict, 200)
    elif feature_name == "wikilinks":
        feature = WikiLinks()
    elif feature_name == "answer_present":
        feature = AnswerPresent()
    elif feature_name == "label":
        feature = Labeler(questions)
    elif feature_name == "classifier":
        feature = Classifier('data/classifier/bigrams.pkl', questions)
    else:
        print("Don't know what to do with %s" % feature_name)
    print("done")
    return feature
Beispiel #2
0
from extractors.lm import *
from extractors.deep import *
from extractors.classifier import *
from extractors.wikilinks import *
from extractors.answer_present import AnswerPresent

kMIN_APPEARANCES = 7
kFEATURES = OrderedDict([("ir", None), ("lm", None), ("deep", None),
    ("answer_present", None), ("text", None),
    ("classifier", None), ("wikilinks", None),
    ])

# Add features that actually guess
# TODO: Make this less cumbersome
kHAS_GUESSES = set()
if IrExtractor.has_guess():
    kHAS_GUESSES.add("ir")
if LanguageModel.has_guess():
    kHAS_GUESSES.add("lm")
if TextExtractor.has_guess():
    kHAS_GUESSES.add("text")
if DeepExtractor.has_guess():
    kHAS_GUESSES.add("deep")
if Classifier.has_guess():
    kHAS_GUESSES.add("classifier")
if AnswerPresent.has_guess():
    kHAS_GUESSES.add("answer_present")

kGRANULARITIES = ["sentence"]
kFOLDS = ["dev", "devtest", "test"]
kNEGINF = float("-inf")
Beispiel #3
0
def instantiate_feature(feature_name, questions):
    """
    @param feature_name: The feature to instantiate
    @param questions: question database
    """

    feature = None
    print("Loading feature %s ..." % feature_name)
    if feature_name == "ir":
        feature = IrExtractor()

        wiki_mean = 0.0
        wiki_var = 1.0
        qb_mean = 0.0
        qb_var = 1.0
        source_mean = 0.0
        source_var = 1.0

        feature.add_index("wiki_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_wiki", kMIN_APPEARANCES),
                          wiki_mean, wiki_var)
        feature.add_index("qb_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_qb", kMIN_APPEARANCES),
                          qb_mean, qb_var)
        feature.add_index("source_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_source", kMIN_APPEARANCES),
                          source_mean, source_var)

    elif feature_name == "text":
        feature = TextExtractor()
    elif feature_name == "lm":
        feature = LanguageModel("data/lm.txt")
        feature.add_corpus("qb")
        feature.add_corpus("wiki")
        feature.add_corpus("source")
    elif feature_name == "deep":
        page_dict = {}
        for page in questions.get_all_pages():
            page_dict[page.lower().replace(' ', '_')] = page
        feature = DeepExtractor("data/deep/classifier", \
            "data/deep/params", "data/deep/vocab", \
            "data/common/ners", page_dict, 200)
    elif feature_name == "wikilinks":
        feature = WikiLinks()
    elif feature_name == "answer_present":
        feature = AnswerPresent()
    elif feature_name == "label":
        feature = Labeler(questions)
    elif feature_name == "classifier":
        feature = Classifier('data/classifier/bigrams.pkl', questions)
    else:
        print("Don't know what to do with %s" % feature_name)
    print("done")
    return feature
Beispiel #4
0
def instantiate_feature(feature_name, questions, deep_data="data/deep"):
    """
    @param feature_name: The feature to instantiate
    @param questions: question database
    """

    feature = None
    print("Loading feature %s ..." % feature_name)
    if feature_name == "ir":
        feature = IrExtractor()

        wiki_mean = 0.0
        wiki_var = 1.0
        qb_mean = 0.0
        qb_var = 1.0
        source_mean = 0.0
        source_var = 1.0

        feature.add_index("wiki_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_wiki", kMIN_APPEARANCES),
                          wiki_mean, wiki_var)
        feature.add_index("qb_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_qb", kMIN_APPEARANCES),
                          qb_mean, qb_var)
        feature.add_index("source_%i" % kMIN_APPEARANCES, "%s_%i" %
                          ("data/ir/whoosh_source", kMIN_APPEARANCES),
                          source_mean, source_var)

    elif feature_name == "text":
        feature = TextExtractor()
    elif feature_name == "lm":
        feature = LanguageModel("data/language_model")
        feature.add_corpus("qb")
        feature.add_corpus("wiki")
        feature.add_corpus("source")
    elif feature_name == "deep":
        print("from %s" % deep_data)
        page_dict = {}
        for page in questions.get_all_pages():
            page_dict[page.lower().replace(' ', '_')] = page
        feature = DeepExtractor("%s/classifier" % deep_data, \
            "%s/params" % deep_data, "%s/vocab" % deep_data, \
            "data/common/ners", page_dict, 200)
    elif feature_name == "wikilinks":
        feature = WikiLinks()
    elif feature_name == "answer_present":
        feature = AnswerPresent()
    elif feature_name == "label":
        feature = Labeler(questions)
    elif feature_name == "classifier":
        feature = Classifier('data/classifier/bigrams.pkl', questions)
    elif feature_name == "mentions":
        feature = Mentions(questions, kMIN_APPEARANCES)
    else:
        print("Don't know what to do with %s" % feature_name)
    print("done")
    return feature
Beispiel #5
0
from extractors.lm import *
from extractors.deep import *
from extractors.classifier import *
from extractors.wikilinks import *
from extractors.answer_present import AnswerPresent

kMIN_APPEARANCES = 5
kFEATURES = OrderedDict([("ir", None), ("lm", None), ("deep", None),
    ("answer_present", None), ("text", None),
    ("classifier", None), ("wikilinks", None),
    ])

# Add features that actually guess
# TODO: Make this less cumbersome
kHAS_GUESSES = set()
if IrExtractor.has_guess():
    kHAS_GUESSES.add("ir")
if LanguageModel.has_guess():
    kHAS_GUESSES.add("lm")
if TextExtractor.has_guess():
    kHAS_GUESSES.add("text")
if DeepExtractor.has_guess():
    kHAS_GUESSES.add("deep")
if Classifier.has_guess():
    kHAS_GUESSES.add("classifier")
if AnswerPresent.has_guess():
    kHAS_GUESSES.add("answer_present")

kGRANULARITIES = ["sentence"]
kFOLDS = ["dev", "devtest", "test"]
kNEGINF = float("-inf")