def fit(self, tasks): alt, unver = "чередующаяся", "непроверяемая" for task in tasks: task = standardize_task(task) text = task["text"] if alt in text: type_ = "alternations" elif unver in text: type_ = "unverifiable" else: type_ = "verifiable" correct = ( task["solution"]["correct_variants"][0] if "correct_variants" in task["solution"] else task["solution"]["correct"] ) for correct_id in correct: for word in task["choices"][int(correct_id) - 1]["parts"]: word_sub = re.sub( r"^[\w|\)]\d\)", "", re.sub( r"^\d\)", "", re.sub(r" *(?:^\d\)|\(.*?\)) *", "", word.strip()).strip(), ).strip(), ).strip() self.known_examples[type_].append(word_sub)
def predict_from_model(self, task): task = standardize_task(task) text, choices = task["text"], task["question"]["choices"] alt, unver = "чередующаяся", "непроверяемая" type_ = "alternations" if alt in text else "unverifiable" if unver in text else "verifiable" nice_option_ids = list() for option in choices: parsed_option = re.sub(r"^\d\)", "", option["text"]).split(", ") if all(self.is_of_type(word, type_) for word in parsed_option): nice_option_ids.append(option["id"]) if choices[0]["text"].count(", ") == 0: if len(nice_option_ids) == 0: return [random.choice([str(i + 1) for i in range(5)])] elif len(nice_option_ids) == 1: return nice_option_ids else: return [random.choice(nice_option_ids)] else: if len(nice_option_ids) == 0: return sorted(random.sample([str(i + 1) for i in range(5)], 2)) elif len(nice_option_ids) == 1: return sorted(nice_option_ids + [ random.choice([ str(i + 1) for i in range(5) if str(i + 1) != nice_option_ids[0] ]) ]) elif len(nice_option_ids) in [2, 3]: return sorted(nice_option_ids) else: return sorted(random.sample(nice_option_ids, 2))
def predict_from_model(self, task): task = standardize_task(task) sentences = [ re.sub(r"^\d\) ?", "", sentence['text']) for sentence in task["question"]["choices"] ] sentences = self.strs_to_pos_tags(sentences) vector = self.count_vectorizer.transform(sentences).toarray() proba = self.classifier.predict_proba(vector)[:, 1] two_highest = sorted([str(i + 1) for i in np.argsort(proba)[-2:]]) return two_highest
def predict_from_model(self, task): result, task = [], standardize_task(task) match = re.search(r'буква ([ЭОУАЫЕЁЮЯИ])', task["text"]) if match: letter = match.group(1) return self.get_answer_by_vowel(task["choices"], letter.lower()) elif "одна и та же буква" in task["text"]: for vowel in "эоуаыеёюяидтсз": result_with_this_vowel = self.get_answer_by_vowel( task["choices"], vowel) result.extend(result_with_this_vowel) return sorted(list(set(result)))
def predict_from_model(self, task): #print('-'*40,task['id'],'-'*40) words = [word for word in task['text'].split("\n") if (1 < len(word))] if task['question']['type'] == 'multiple_choice': words = [] num = 1 for ans in task['question']['choices']: words.append(ans['text']) else: num = 0 words = words[1:] #print(words) result = [] match = re.search(r'буква ([ЭОУАЫЕЁЮЯИ])', task['text']) if match: letter = match.group(1) return self.get_answer_by_vowel_10(words, letter.lower(), num) elif "одна и та же" in task['text']: for vowel in "эоуаыеёюяидтсзьъ": result_with_this_vowel = self.get_answer_by_vowel_10( words, vowel, num) if num == 1: result.extend(result_with_this_vowel) elif result_with_this_vowel: result = result_with_this_vowel break #print('я',result) if not result: result, task = [], standardize_task(task) #print(task) match = re.search(r'буква ([ЭОУАЫЕЁЮЯИ])', task["text"]) if match: letter = match.group(1) return self.get_answer_by_vowel(task["question"]["choices"], letter.lower(), num) elif "одна и та же буква" in task["text"]: for vowel in "эоуаыеёюяидтсз": result_with_this_vowel = self.get_answer_by_vowel( task["question"]["choices"], vowel, num) result.extend(result_with_this_vowel) #print('не я',result) #print(num,result) if num == 1: answer = sorted(list(set(result))) else: answer = result return answer
def predict_from_model(self, task): task["text"] = re.sub("[^а-яА-яЁё\.\,\! ]|_", "", task["text"]) result, task = [], standardize_task(task) match = re.search(r"буква ([ЭОУАЫЕЁЮЯИ])*", task["text"], re.IGNORECASE) if match: letter = match.group(1) return self.get_answer_by_letter( task["question"]["choices"], letter.lower() ) elif "одна и та же буква" in task["text"]: for vowel in self.alphabet: result_with_this_vowel = self.get_answer_by_letter( task["question"]["choices"], vowel ) result.extend(result_with_this_vowel) return sorted(list(set(result)))
def fit(self, tasks): X, y = [], [] for task in tasks: task = standardize_task(task) correct = (task["solution"]["correct_variants"][0] if "correct_variants" in task["solution"] else [task["solution"]["correct"]]) sentences = [ re.sub(r"^\d\) ?", "", sentence["text"]) for sentence in task["question"]["choices"] ] sentences = self.strs_to_pos_tags(sentences) X.extend(sentences) y.extend([1 if str(i + 1) in correct else 0 for i in range(5)]) X = self.count_vectorizer.fit_transform(X).toarray() X_train, X_dev, y_train, y_dev = train_test_split(X, y, train_size=0.9) self.classifier.fit(X_train, y_train, eval_set=(X_dev, y_dev))
def predict_from_model(self, task): task = standardize_task(task) text, choices = task["text"], task["question"]["choices"] alt, unver = "чередующаяся", "непроверяемая" type_ = "alternations" if alt in text else "unverifiable" if unver in text else "verifiable" nice_option_ids = list() for option in choices: parsed_option = re.sub(r"^\d\)", "", option["text"]).split(", ") pos_count = 0 neg_count = 0 for word in parsed_option: for k in self.known_examples: if self.is_of_type(word, k): if k == type_: pos_count += 1 else: neg_count += 1 nice_option_ids.append( (pos_count if neg_count == 0 else -neg_count, option["id"])) nice_option_ids.sort() if choices[0]["text"].count(", ") == 0: if len(nice_option_ids) == 0: return [random.choice([str(i + 1) for i in range(5)])] elif len(nice_option_ids) == 1: return [nice_option_ids[0][1]] else: return [nice_option_ids[-1][1]] else: if len(nice_option_ids) == 0: return sorted(random.sample([str(i + 1) for i in range(5)], 2)) elif len(nice_option_ids) == 1: return sorted([nice_option_ids[0][1]] + [ random.choice([ str(i + 1) for i in range(5) if str(i + 1) != nice_option_ids[0][1] ]) ]) elif len(nice_option_ids) in [2, 3]: return sorted([el[1] for el in nice_option_ids]) else: return sorted([el[1] for el in nice_option_ids[-2:]])
def fit(self, tasks): alt, unver, ver = "чередующаяся", "непроверяемая", "проверяемая" for task in tasks: #if 'hint' in task: #continue task = standardize_task(task) text = task["text"] if alt in text: type_ = "alternations" elif unver in text: type_ = "unverifiable" elif ver in text: type_ = "verifiable" else: continue correct = task["solution"]["correct_variants"][0] if "correct_variants" in task["solution"] \ else task["solution"]["correct"] for correct_id in correct: for word in task["choices"][int(correct_id) - 1]["parts"]: word_sub = re.sub(r" *(?:^\d\)|\(.*?\)) *", "", word) self.known_examples[type_].append(word_sub) return self