def __init__(self, corpus=orch.orchid_corpus(), custom_dict=dict()): self.corpus = corpus self.wp = wp.word_processing() self.custom_dict = custom_dict if custom_dict != None: self.set_custom_dict(custom_dict) else: self.dict_name = "lexitron_original.txt"
def question_orchid(test_set): labeled_value = {"good": [], "bad": []} orchid = orchid_corpus.orchid_corpus() for question_item in test_set: known_word_count = sum([orchid.exists(word[0]) for word in question_item.sentence.pos]) known_ratio = (known_word_count/len(question_item.sentence.pos)) * 100 if question_item.get_average_eval() >= 2.0: labeled_value["good"].append(known_ratio) else: labeled_value["bad"].append(known_ratio) plot_histogram(labeled_value["good"], labeled_value["bad"], x_range=(0, 100), bins_count=10, x_label="ratio of known words (in orchid)", graph_name="Question acceptability compared to ratio of known words in orchid")
from statistics import mean import preprocess.orchid_corpus as orchid_corpus import preprocess.sentence as _sentence import choices.wordnet.wn_tree as wn_tree import choices.word_item as _word_item import ast wn = wn_tree.wordnet_tree() orchid = orchid_corpus.orchid_corpus() class question_item: def __init__(self, *args, **kwargs): if len(args) == 5: (sentence, sentence_no, question, answer, answer_index) = args self.sentence = sentence self.sentence_no = sentence_no self.question = question self.answer = answer self.answer_index = answer_index self.choices = None self.asked_choices = None elif "from_str" in kwargs: attributes = ast.literal_eval(kwargs["from_str"]) for key in attributes: if key == "sentence": self.sentence = _sentence.sentence(from_str=attributes["sentence"]) elif key == "choices": self.choices = [_word_item.word_item(from_str=choice_str) for choice_str in attributes["choices"]]