def __init__(self, bert: Optional[Bert] = None): if bert is not None: self.bert = bert else: self.bert = Bert() self.model = LgbClassifier('task17')
def __init__(self, bert: Optional[Bert] = None, speller: Optional[Speller] = None): if bert is None: bert = Bert() self.bert: Bert = bert if speller is None: speller = Speller() self.speller: Speller = speller
def __init__(self, bert: Optional[Bert] = None, sentence_encoder: Optional[SentenceEncoder] = None): if bert is None: bert = Bert() self.bert: Bert = bert if sentence_encoder is None: sentence_encoder = SentenceEncoder() self.sentence_encoder: SentenceEncoder = sentence_encoder
def __init__(self, bert: Optional[Bert] = None, speller: Optional[Speller] = None): if bert is None: bert = Bert() self.bert: Bert = bert if speller is None: speller = Speller() self.speller: Speller = speller # self.model = LgbClassifier('task25') self.model = CtbClassifier('task25')
def __add_default_solvers(self): if self.bert is None: self.bert = Bert() if self.speller is None: self.speller = Speller() if self.stress is None: self.stress = Stress() if self.sentence_encoder is None: self.sentence_encoder = SentenceEncoder() if self.esse is None: self.esse = EsseLoader(sentence_encoder=self.sentence_encoder) if self.ner is None: self.ner = NER() if self.syntax is None: self.syntax = SyntaxParser(self.speller) if self.sberbank_solver is None: self.sberbank_solver = SberbankSolver() # self.add_solver(ZeroSolver()) # self.add_solver(DummySolver()) # self.add_solver(ZeroSberbankSolver(task_types=[ # Task24, # ])) self.add_solver(self.sberbank_solver) self.add_solver(Task01Solver(self.bert, self.sentence_encoder)) self.add_solver(Task02Solver(self.bert, self.speller)) self.add_solver(Task03Solver(self.bert, self.sentence_encoder)) self.add_solver(Task04Solver(self.stress)) self.add_solver(Task05Solver(self.bert, self.speller)) self.add_solver( Task06Solver(self.bert, self.speller, self.sberbank_solver)) self.add_solver(Task07Solver(self.speller)) self.add_solver(Task08Solver(self.bert, self.speller, self.syntax)) self.add_solver(Task09Solver(self.speller, self.stress)) self.add_solver(Task10Solver(self.speller)) self.add_solver(Task13Solver(self.bert, self.speller)) self.add_solver(Task14Solver(self.bert, self.speller)) self.add_solver(Task15Solver(self.bert, self.speller)) self.add_solver(Task16Solver(self.bert)) self.add_solver(Task17Solver(self.bert)) self.add_solver(Task21Solver(self.syntax, self.sberbank_solver)) self.add_solver(Task25Solver(self.bert, self.speller)) self.add_solver( Task27Solver(self.esse, self.ner, self.sberbank_solver, self.syntax))
def __init__(self, bert: Optional[Bert] = None, speller: Optional[Speller] = None): if bert is None: bert = Bert() self.bert: Bert = bert if speller is None: speller = Speller() self.speller: Speller = speller model_dir = os.path.abspath(os.path.dirname(__file__) + '/../../var/model') with open(model_dir + '/paronyms.pickle', 'rb') as f: self.paronyms: Dict[str, str] = pickle.load(f)
def __init__(self, bert: Optional[Bert] = None, speller: Optional[Speller] = None, sberbank_solver: Optional[SberbankSolver] = None): if bert is None: bert = Bert() self.bert: Bert = bert if speller is None: speller = Speller() self.speller: Speller = speller if sberbank_solver is None: sberbank_solver = SberbankSolver() self.sberbank_solver: SberbankSolver = sberbank_solver self.embs = self.load_pkl('task06') self.pos_pairs = set() for text, word_idx in self.EXAMPLES: self.pos_pairs.add(self.get_pos_pair(text))
from flask import Flask, request, jsonify from lib.util.bert import Bert from lib.util.sentence_encoder import SentenceEncoder from lib.solver import SberbankSolver from lib.ololosh import OloloshAI from lib.sberbank.utils import rus_tok bert = Bert() bert.eval('warmup') sentence_encoder = SentenceEncoder() sentence_encoder.encode(['warmup']) sberbank_solver = SberbankSolver() ai = OloloshAI(bert=bert, sentence_encoder=sentence_encoder, sberbank_solver=sberbank_solver) app = Flask(__name__) @app.route('/ready') def http_ready(): return 'OK' @app.route('/take_exam', methods=['POST']) def http_take_exam(): request_data = request.get_json() tasks = request_data['tasks'] answers = ai.take_exam(tasks)
def __init__(self, bert: Optional[Bert] = None): if bert is None: bert = Bert() self.bert: Bert = bert
class Task17Solver(BaseSolver): def __init__(self, bert: Optional[Bert] = None): if bert is not None: self.bert = bert else: self.bert = Bert() self.model = LgbClassifier('task17') # self.model = CtbClassifier('task17') def get_task_type(self) -> str: return Task.TYPE_17 def solve(self, task: Task17) -> List[str]: X, _ = self.get_Xy([task]) preds = self.model.predict(X)[:, 1] return [str(i + 1) for i, score in enumerate(preds) if preds[i] > 0.5] def train(self, tasks: List[Task17], save: bool = True): X, y = self.get_Xy(tasks) self.model.train(X, y) if save: self.model.save() def get_Xy(self, tasks: List[Task17]) -> Tuple[np.array, Optional[np.array]]: X = [] y = [] for task in tasks: emb = self.bert.eval(task.sentence) token_positions = self.__get_token_positions(emb.token_ids, task.mask) token_embeddings = emb.get_token_embeddings(token_positions) # scores = self.get_comma_scores(task) for i, token_embedding in enumerate(token_embeddings): X.append(np.concatenate(( token_embedding, # np.array(scores[i]), ))) if y is not None and task.labels is not None: y.append(task.labels[i]) else: y = None if y is not None: y = np.array(y) return np.array(X), y def __get_token_positions(self, token_ids: np.array, mask: List[bool]) -> List[int]: mask_num = 0 token_positions = [] for i, token_id in enumerate(token_ids): if token_id == 128: if mask[mask_num]: token_positions.append(i) mask_num += 1 if len(mask) < mask_num: break return token_positions def nth_repl(self, s, sub, repl, nth): find = s.find(sub) # if find is not p1 we have found at least one match for the substring i = find != -1 # loop util we find the nth or we find no match while find != -1 and i != nth: # find + 1 means we start at the last match start index + 1 find = s.find(sub, find + 1) i += 1 # if i is equal to nth we found nth matches so replace if i == nth: return s[:find] + repl + s[find + len(sub):] return s def get_comma_scores(self, task): n = 0 scores = [] for mask in task.mask: n += 1 if not mask: continue text = self.nth_repl(task.sentence, ',', ' [MASK] ', n) score = self.bert.get_word_in_text_scores(text, ',') scores.append(score) return scores