Example #1
0
    def __init__(self, bert: Optional[Bert] = None):
        if bert is not None:
            self.bert = bert
        else:
            self.bert = Bert()

        self.model = LgbClassifier('task17')
Example #2
0
    def __init__(self,
                 bert: Optional[Bert] = None,
                 speller: Optional[Speller] = None):
        if bert is None:
            bert = Bert()
        self.bert: Bert = bert

        if speller is None:
            speller = Speller()
        self.speller: Speller = speller
Example #3
0
    def __init__(self,
                 bert: Optional[Bert] = None,
                 sentence_encoder: Optional[SentenceEncoder] = None):
        if bert is None:
            bert = Bert()
        self.bert: Bert = bert

        if sentence_encoder is None:
            sentence_encoder = SentenceEncoder()
        self.sentence_encoder: SentenceEncoder = sentence_encoder
Example #4
0
    def __init__(self, bert: Optional[Bert] = None, speller: Optional[Speller] = None):
        if bert is None:
            bert = Bert()
        self.bert: Bert = bert

        if speller is None:
            speller = Speller()
        self.speller: Speller = speller

        # self.model = LgbClassifier('task25')
        self.model = CtbClassifier('task25')
Example #5
0
    def __add_default_solvers(self):
        if self.bert is None:
            self.bert = Bert()

        if self.speller is None:
            self.speller = Speller()

        if self.stress is None:
            self.stress = Stress()

        if self.sentence_encoder is None:
            self.sentence_encoder = SentenceEncoder()

        if self.esse is None:
            self.esse = EsseLoader(sentence_encoder=self.sentence_encoder)

        if self.ner is None:
            self.ner = NER()

        if self.syntax is None:
            self.syntax = SyntaxParser(self.speller)

        if self.sberbank_solver is None:
            self.sberbank_solver = SberbankSolver()

        # self.add_solver(ZeroSolver())
        # self.add_solver(DummySolver())
        # self.add_solver(ZeroSberbankSolver(task_types=[
        #     Task24,
        # ]))

        self.add_solver(self.sberbank_solver)
        self.add_solver(Task01Solver(self.bert, self.sentence_encoder))
        self.add_solver(Task02Solver(self.bert, self.speller))
        self.add_solver(Task03Solver(self.bert, self.sentence_encoder))
        self.add_solver(Task04Solver(self.stress))
        self.add_solver(Task05Solver(self.bert, self.speller))
        self.add_solver(
            Task06Solver(self.bert, self.speller, self.sberbank_solver))
        self.add_solver(Task07Solver(self.speller))
        self.add_solver(Task08Solver(self.bert, self.speller, self.syntax))
        self.add_solver(Task09Solver(self.speller, self.stress))
        self.add_solver(Task10Solver(self.speller))
        self.add_solver(Task13Solver(self.bert, self.speller))
        self.add_solver(Task14Solver(self.bert, self.speller))
        self.add_solver(Task15Solver(self.bert, self.speller))
        self.add_solver(Task16Solver(self.bert))
        self.add_solver(Task17Solver(self.bert))
        self.add_solver(Task21Solver(self.syntax, self.sberbank_solver))
        self.add_solver(Task25Solver(self.bert, self.speller))
        self.add_solver(
            Task27Solver(self.esse, self.ner, self.sberbank_solver,
                         self.syntax))
Example #6
0
    def __init__(self, bert: Optional[Bert] = None, speller: Optional[Speller] = None):
        if bert is None:
            bert = Bert()
        self.bert: Bert = bert

        if speller is None:
            speller = Speller()
        self.speller: Speller = speller

        model_dir = os.path.abspath(os.path.dirname(__file__) + '/../../var/model')
        with open(model_dir + '/paronyms.pickle', 'rb') as f:
            self.paronyms: Dict[str, str] = pickle.load(f)
Example #7
0
    def __init__(self, bert: Optional[Bert] = None, speller: Optional[Speller] = None, sberbank_solver: Optional[SberbankSolver] = None):
        if bert is None:
            bert = Bert()
        self.bert: Bert = bert

        if speller is None:
            speller = Speller()
        self.speller: Speller = speller

        if sberbank_solver is None:
            sberbank_solver = SberbankSolver()
        self.sberbank_solver: SberbankSolver = sberbank_solver

        self.embs = self.load_pkl('task06')

        self.pos_pairs = set()
        for text, word_idx in self.EXAMPLES:
            self.pos_pairs.add(self.get_pos_pair(text))
Example #8
0
from flask import Flask, request, jsonify
from lib.util.bert import Bert
from lib.util.sentence_encoder import SentenceEncoder
from lib.solver import SberbankSolver
from lib.ololosh import OloloshAI
from lib.sberbank.utils import rus_tok


bert = Bert()
bert.eval('warmup')

sentence_encoder = SentenceEncoder()
sentence_encoder.encode(['warmup'])

sberbank_solver = SberbankSolver()

ai = OloloshAI(bert=bert, sentence_encoder=sentence_encoder, sberbank_solver=sberbank_solver)
app = Flask(__name__)


@app.route('/ready')
def http_ready():
    return 'OK'


@app.route('/take_exam', methods=['POST'])
def http_take_exam():
    request_data = request.get_json()
    tasks = request_data['tasks']
    answers = ai.take_exam(tasks)
Example #9
0
 def __init__(self, bert: Optional[Bert] = None):
     if bert is None:
         bert = Bert()
     self.bert: Bert = bert
Example #10
0
class Task17Solver(BaseSolver):
    def __init__(self, bert: Optional[Bert] = None):
        if bert is not None:
            self.bert = bert
        else:
            self.bert = Bert()

        self.model = LgbClassifier('task17')
        # self.model = CtbClassifier('task17')

    def get_task_type(self) -> str:
        return Task.TYPE_17

    def solve(self, task: Task17) -> List[str]:
        X, _ = self.get_Xy([task])
        preds = self.model.predict(X)[:, 1]

        return [str(i + 1) for i, score in enumerate(preds) if preds[i] > 0.5]

    def train(self, tasks: List[Task17], save: bool = True):
        X, y = self.get_Xy(tasks)
        self.model.train(X, y)

        if save:
            self.model.save()

    def get_Xy(self, tasks: List[Task17]) -> Tuple[np.array, Optional[np.array]]:
        X = []
        y = []

        for task in tasks:
            emb = self.bert.eval(task.sentence)
            token_positions = self.__get_token_positions(emb.token_ids, task.mask)
            token_embeddings = emb.get_token_embeddings(token_positions)
            # scores = self.get_comma_scores(task)

            for i, token_embedding in enumerate(token_embeddings):
                X.append(np.concatenate((
                    token_embedding,
                    # np.array(scores[i]),
                )))
                if y is not None and task.labels is not None:
                    y.append(task.labels[i])
                else:
                    y = None

        if y is not None:
            y = np.array(y)

        return np.array(X), y

    def __get_token_positions(self, token_ids: np.array, mask: List[bool]) -> List[int]:
        mask_num = 0
        token_positions = []

        for i, token_id in enumerate(token_ids):
            if token_id == 128:
                if mask[mask_num]:
                    token_positions.append(i)
                mask_num += 1
                if len(mask) < mask_num:
                    break

        return token_positions

    def nth_repl(self, s, sub, repl, nth):
        find = s.find(sub)
        # if find is not p1 we have found at least one match for the substring
        i = find != -1
        # loop util we find the nth or we find no match
        while find != -1 and i != nth:
            # find + 1 means we start at the last match start index + 1
            find = s.find(sub, find + 1)
            i += 1
        # if i  is equal to nth we found nth matches so replace
        if i == nth:
            return s[:find] + repl + s[find + len(sub):]
        return s

    def get_comma_scores(self, task):
        n = 0
        scores = []

        for mask in task.mask:
            n += 1
            if not mask:
                continue

            text = self.nth_repl(task.sentence, ',', ' [MASK] ', n)
            score = self.bert.get_word_in_text_scores(text, ',')
            scores.append(score)

        return scores