def instantiate_feature(feature_name, questions): """ @param feature_name: The feature to instantiate @param questions: question database @param first_pass_guess: Is this our first pass generating guesses? (Used for standardizing IR scores) """ feature = None print("Loading feature %s ..." % feature_name) if feature_name == "ir": feature = IrExtractor() wiki_mean = 0.0 wiki_var = 1.0 qb_mean = 0.0 qb_var = 1.0 feature.add_index("wiki_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_wiki", kMIN_APPEARANCES), wiki_mean, wiki_var) feature.add_index("qb_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_qb", kMIN_APPEARANCES), qb_mean, qb_var) elif feature_name == "text": feature = TextExtractor() elif feature_name == "lm": feature = pickle.load(open("data/lm.pkl")) elif feature_name == "deep": page_dict = {} for page in questions.get_all_pages(): page_dict[page.lower().replace(' ', '_')] = page feature = DeepExtractor("data/deep/classifier", \ "data/deep/params", "data/deep/vocab", \ "data/common/ners", page_dict, 200) elif feature_name == "wikilinks": feature = WikiLinks() elif feature_name == "answer_present": feature = AnswerPresent() elif feature_name == "label": feature = Labeler(questions) elif feature_name == "classifier": feature = Classifier('data/classifier/bigrams.pkl', questions) else: print("Don't know what to do with %s" % feature_name) print("done") return feature
from extractors.lm import * from extractors.deep import * from extractors.classifier import * from extractors.wikilinks import * from extractors.answer_present import AnswerPresent kMIN_APPEARANCES = 7 kFEATURES = OrderedDict([("ir", None), ("lm", None), ("deep", None), ("answer_present", None), ("text", None), ("classifier", None), ("wikilinks", None), ]) # Add features that actually guess # TODO: Make this less cumbersome kHAS_GUESSES = set() if IrExtractor.has_guess(): kHAS_GUESSES.add("ir") if LanguageModel.has_guess(): kHAS_GUESSES.add("lm") if TextExtractor.has_guess(): kHAS_GUESSES.add("text") if DeepExtractor.has_guess(): kHAS_GUESSES.add("deep") if Classifier.has_guess(): kHAS_GUESSES.add("classifier") if AnswerPresent.has_guess(): kHAS_GUESSES.add("answer_present") kGRANULARITIES = ["sentence"] kFOLDS = ["dev", "devtest", "test"] kNEGINF = float("-inf")
def instantiate_feature(feature_name, questions): """ @param feature_name: The feature to instantiate @param questions: question database """ feature = None print("Loading feature %s ..." % feature_name) if feature_name == "ir": feature = IrExtractor() wiki_mean = 0.0 wiki_var = 1.0 qb_mean = 0.0 qb_var = 1.0 source_mean = 0.0 source_var = 1.0 feature.add_index("wiki_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_wiki", kMIN_APPEARANCES), wiki_mean, wiki_var) feature.add_index("qb_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_qb", kMIN_APPEARANCES), qb_mean, qb_var) feature.add_index("source_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_source", kMIN_APPEARANCES), source_mean, source_var) elif feature_name == "text": feature = TextExtractor() elif feature_name == "lm": feature = LanguageModel("data/lm.txt") feature.add_corpus("qb") feature.add_corpus("wiki") feature.add_corpus("source") elif feature_name == "deep": page_dict = {} for page in questions.get_all_pages(): page_dict[page.lower().replace(' ', '_')] = page feature = DeepExtractor("data/deep/classifier", \ "data/deep/params", "data/deep/vocab", \ "data/common/ners", page_dict, 200) elif feature_name == "wikilinks": feature = WikiLinks() elif feature_name == "answer_present": feature = AnswerPresent() elif feature_name == "label": feature = Labeler(questions) elif feature_name == "classifier": feature = Classifier('data/classifier/bigrams.pkl', questions) else: print("Don't know what to do with %s" % feature_name) print("done") return feature
def instantiate_feature(feature_name, questions, deep_data="data/deep"): """ @param feature_name: The feature to instantiate @param questions: question database """ feature = None print("Loading feature %s ..." % feature_name) if feature_name == "ir": feature = IrExtractor() wiki_mean = 0.0 wiki_var = 1.0 qb_mean = 0.0 qb_var = 1.0 source_mean = 0.0 source_var = 1.0 feature.add_index("wiki_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_wiki", kMIN_APPEARANCES), wiki_mean, wiki_var) feature.add_index("qb_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_qb", kMIN_APPEARANCES), qb_mean, qb_var) feature.add_index("source_%i" % kMIN_APPEARANCES, "%s_%i" % ("data/ir/whoosh_source", kMIN_APPEARANCES), source_mean, source_var) elif feature_name == "text": feature = TextExtractor() elif feature_name == "lm": feature = LanguageModel("data/language_model") feature.add_corpus("qb") feature.add_corpus("wiki") feature.add_corpus("source") elif feature_name == "deep": print("from %s" % deep_data) page_dict = {} for page in questions.get_all_pages(): page_dict[page.lower().replace(' ', '_')] = page feature = DeepExtractor("%s/classifier" % deep_data, \ "%s/params" % deep_data, "%s/vocab" % deep_data, \ "data/common/ners", page_dict, 200) elif feature_name == "wikilinks": feature = WikiLinks() elif feature_name == "answer_present": feature = AnswerPresent() elif feature_name == "label": feature = Labeler(questions) elif feature_name == "classifier": feature = Classifier('data/classifier/bigrams.pkl', questions) elif feature_name == "mentions": feature = Mentions(questions, kMIN_APPEARANCES) else: print("Don't know what to do with %s" % feature_name) print("done") return feature
from extractors.lm import * from extractors.deep import * from extractors.classifier import * from extractors.wikilinks import * from extractors.answer_present import AnswerPresent kMIN_APPEARANCES = 5 kFEATURES = OrderedDict([("ir", None), ("lm", None), ("deep", None), ("answer_present", None), ("text", None), ("classifier", None), ("wikilinks", None), ]) # Add features that actually guess # TODO: Make this less cumbersome kHAS_GUESSES = set() if IrExtractor.has_guess(): kHAS_GUESSES.add("ir") if LanguageModel.has_guess(): kHAS_GUESSES.add("lm") if TextExtractor.has_guess(): kHAS_GUESSES.add("text") if DeepExtractor.has_guess(): kHAS_GUESSES.add("deep") if Classifier.has_guess(): kHAS_GUESSES.add("classifier") if AnswerPresent.has_guess(): kHAS_GUESSES.add("answer_present") kGRANULARITIES = ["sentence"] kFOLDS = ["dev", "devtest", "test"] kNEGINF = float("-inf")