예제 #1
0
    def fit(self, tasks):
        alt, unver = "чередующаяся", "непроверяемая"
        for task in tasks:
            task = standardize_task(task)
            text = task["text"]

            if alt in text:
                type_ = "alternations"
            elif unver in text:
                type_ = "unverifiable"
            else:
                type_ = "verifiable"

            correct = (
                task["solution"]["correct_variants"][0]
                if "correct_variants" in task["solution"]
                else task["solution"]["correct"]
            )
            for correct_id in correct:
                for word in task["choices"][int(correct_id) - 1]["parts"]:
                    word_sub = re.sub(
                        r"^[\w|\)]\d\)",
                        "",
                        re.sub(
                            r"^\d\)",
                            "",
                            re.sub(r" *(?:^\d\)|\(.*?\)) *", "", word.strip()).strip(),
                        ).strip(),
                    ).strip()
                    self.known_examples[type_].append(word_sub)
예제 #2
0
 def predict_from_model(self, task):
     task = standardize_task(task)
     text, choices = task["text"], task["question"]["choices"]
     alt, unver = "чередующаяся", "непроверяемая"
     type_ = "alternations" if alt in text else "unverifiable" if unver in text else "verifiable"
     nice_option_ids = list()
     for option in choices:
         parsed_option = re.sub(r"^\d\)", "", option["text"]).split(", ")
         if all(self.is_of_type(word, type_) for word in parsed_option):
             nice_option_ids.append(option["id"])
     if choices[0]["text"].count(", ") == 0:
         if len(nice_option_ids) == 0:
             return [random.choice([str(i + 1) for i in range(5)])]
         elif len(nice_option_ids) == 1:
             return nice_option_ids
         else:
             return [random.choice(nice_option_ids)]
     else:
         if len(nice_option_ids) == 0:
             return sorted(random.sample([str(i + 1) for i in range(5)], 2))
         elif len(nice_option_ids) == 1:
             return sorted(nice_option_ids + [
                 random.choice([
                     str(i + 1)
                     for i in range(5) if str(i + 1) != nice_option_ids[0]
                 ])
             ])
         elif len(nice_option_ids) in [2, 3]:
             return sorted(nice_option_ids)
         else:
             return sorted(random.sample(nice_option_ids, 2))
예제 #3
0
 def predict_from_model(self, task):
     task = standardize_task(task)
     sentences = [
         re.sub(r"^\d\) ?", "", sentence['text'])
         for sentence in task["question"]["choices"]
     ]
     sentences = self.strs_to_pos_tags(sentences)
     vector = self.count_vectorizer.transform(sentences).toarray()
     proba = self.classifier.predict_proba(vector)[:, 1]
     two_highest = sorted([str(i + 1) for i in np.argsort(proba)[-2:]])
     return two_highest
예제 #4
0
 def predict_from_model(self, task):
     result, task = [], standardize_task(task)
     match = re.search(r'буква ([ЭОУАЫЕЁЮЯИ])', task["text"])
     if match:
         letter = match.group(1)
         return self.get_answer_by_vowel(task["choices"], letter.lower())
     elif "одна и та же буква" in task["text"]:
         for vowel in "эоуаыеёюяидтсз":
             result_with_this_vowel = self.get_answer_by_vowel(
                 task["choices"], vowel)
             result.extend(result_with_this_vowel)
     return sorted(list(set(result)))
예제 #5
0
    def predict_from_model(self, task):
        #print('-'*40,task['id'],'-'*40)
        words = [word for word in task['text'].split("\n") if (1 < len(word))]
        if task['question']['type'] == 'multiple_choice':
            words = []
            num = 1
            for ans in task['question']['choices']:
                words.append(ans['text'])
        else:
            num = 0
            words = words[1:]
        #print(words)
        result = []
        match = re.search(r'буква ([ЭОУАЫЕЁЮЯИ])', task['text'])

        if match:
            letter = match.group(1)
            return self.get_answer_by_vowel_10(words, letter.lower(), num)
        elif "одна и та же" in task['text']:
            for vowel in "эоуаыеёюяидтсзьъ":
                result_with_this_vowel = self.get_answer_by_vowel_10(
                    words, vowel, num)
                if num == 1:
                    result.extend(result_with_this_vowel)
                elif result_with_this_vowel:
                    result = result_with_this_vowel
                    break
        #print('я',result)
        if not result:
            result, task = [], standardize_task(task)
            #print(task)
            match = re.search(r'буква ([ЭОУАЫЕЁЮЯИ])', task["text"])
            if match:
                letter = match.group(1)
                return self.get_answer_by_vowel(task["question"]["choices"],
                                                letter.lower(), num)
            elif "одна и та же буква" in task["text"]:
                for vowel in "эоуаыеёюяидтсз":
                    result_with_this_vowel = self.get_answer_by_vowel(
                        task["question"]["choices"], vowel, num)
                    result.extend(result_with_this_vowel)
            #print('не я',result)
        #print(num,result)
        if num == 1:
            answer = sorted(list(set(result)))
        else:
            answer = result
        return answer
예제 #6
0
 def predict_from_model(self, task):
     task["text"] = re.sub("[^а-яА-яЁё\.\,\! ]|_", "", task["text"])
     result, task = [], standardize_task(task)
     match = re.search(r"буква ([ЭОУАЫЕЁЮЯИ])*", task["text"], re.IGNORECASE)
     if match:
         letter = match.group(1)
         return self.get_answer_by_letter(
             task["question"]["choices"], letter.lower()
         )
     elif "одна и та же буква" in task["text"]:
         for vowel in self.alphabet:
             result_with_this_vowel = self.get_answer_by_letter(
                 task["question"]["choices"], vowel
             )
             result.extend(result_with_this_vowel)
     return sorted(list(set(result)))
예제 #7
0
 def fit(self, tasks):
     X, y = [], []
     for task in tasks:
         task = standardize_task(task)
         correct = (task["solution"]["correct_variants"][0]
                    if "correct_variants" in task["solution"] else
                    [task["solution"]["correct"]])
         sentences = [
             re.sub(r"^\d\) ?", "", sentence["text"])
             for sentence in task["question"]["choices"]
         ]
         sentences = self.strs_to_pos_tags(sentences)
         X.extend(sentences)
         y.extend([1 if str(i + 1) in correct else 0 for i in range(5)])
     X = self.count_vectorizer.fit_transform(X).toarray()
     X_train, X_dev, y_train, y_dev = train_test_split(X, y, train_size=0.9)
     self.classifier.fit(X_train, y_train, eval_set=(X_dev, y_dev))
예제 #8
0
 def predict_from_model(self, task):
     task = standardize_task(task)
     text, choices = task["text"], task["question"]["choices"]
     alt, unver = "чередующаяся", "непроверяемая"
     type_ = "alternations" if alt in text else "unverifiable" if unver in text else "verifiable"
     nice_option_ids = list()
     for option in choices:
         parsed_option = re.sub(r"^\d\)", "", option["text"]).split(", ")
         pos_count = 0
         neg_count = 0
         for word in parsed_option:
             for k in self.known_examples:
                 if self.is_of_type(word, k):
                     if k == type_:
                         pos_count += 1
                     else:
                         neg_count += 1
         nice_option_ids.append(
             (pos_count if neg_count == 0 else -neg_count, option["id"]))
     nice_option_ids.sort()
     if choices[0]["text"].count(", ") == 0:
         if len(nice_option_ids) == 0:
             return [random.choice([str(i + 1) for i in range(5)])]
         elif len(nice_option_ids) == 1:
             return [nice_option_ids[0][1]]
         else:
             return [nice_option_ids[-1][1]]
     else:
         if len(nice_option_ids) == 0:
             return sorted(random.sample([str(i + 1) for i in range(5)], 2))
         elif len(nice_option_ids) == 1:
             return sorted([nice_option_ids[0][1]] + [
                 random.choice([
                     str(i + 1) for i in range(5)
                     if str(i + 1) != nice_option_ids[0][1]
                 ])
             ])
         elif len(nice_option_ids) in [2, 3]:
             return sorted([el[1] for el in nice_option_ids])
         else:
             return sorted([el[1] for el in nice_option_ids[-2:]])
예제 #9
0
    def fit(self, tasks):
        alt, unver, ver = "чередующаяся", "непроверяемая", "проверяемая"
        for task in tasks:
            #if 'hint' in task:
            #continue
            task = standardize_task(task)
            text = task["text"]

            if alt in text:
                type_ = "alternations"
            elif unver in text:
                type_ = "unverifiable"
            elif ver in text:
                type_ = "verifiable"
            else:
                continue

            correct = task["solution"]["correct_variants"][0] if "correct_variants" in task["solution"] \
                else task["solution"]["correct"]
            for correct_id in correct:
                for word in task["choices"][int(correct_id) - 1]["parts"]:
                    word_sub = re.sub(r" *(?:^\d\)|\(.*?\)) *", "", word)
                    self.known_examples[type_].append(word_sub)
        return self