def _predict_proba(self, X):
     """
     Функция, возвращающая для каждого объекта список (классы, вероятности)
     в порядке убывания вероятностей. Потом данная функция вызывается в predict и
     predict_proba.
     """
     # оставляем только парадигмы, под которые может подходить слово
     answer = [None] * len(X)
     row_denser = ((lambda x: np.ravel(x.todense())) if self.sparse else (lambda x: x))
     if self.has_letter_classifiers:
         data_indexes_by_letters = arrange_indexes_by_last_letters(X)
         for letter, indexes in data_indexes_by_letters.items():
             curr_X = [X[i] for i in indexes]
             cls = self.letter_classifiers_.get(letter)
             if cls is None:
                 if letter in self._default_letter_probs:
                     probs_row = self._default_letter_probs[letter]
                 else:
                     probs_row = self.new_letter_probs
                 curr_X_probs = np.tile(probs_row, (len(indexes), 1))
                 curr_classes = range(len(self.classes_))
                 active_classes_number = len(curr_classes)
             else:
                 active_classes_number = cls.active_classes_number
                 (train_indexes, X_train), (other_indexes, other_probs) =\
                     self._prepare_to_joint_classifier(cls, cls.classes_, curr_X,
                                                       active_classes_number=active_classes_number)
                 curr_X_probs = self.joint_classifiers_[letter].predict_proba(X_train)
                 curr_classes = cls.classes_
             # объекты, чьи классы встречались в обучающей выборке
             for i, (train_index, word_probs) in enumerate(zip(train_indexes, curr_X_probs)):
                 index = indexes[train_index]
                 if cls is None:
                     row_ = self._fits_to_which_lemma_fragmentors(curr_X[i], negate=True)
                 else:
                     row = row_denser(X_train[i])
                     row_ = [j for j in range(active_classes_number) if row[3*j] == 0.0]
                 indices, probs = self._extract_word_probs(word_probs, row_)
                 indices = [curr_classes[j] for j in indices]
                 answer[index] = (indices, probs)
             # объекты, чьи классы не встречались в обучающей выборке
             # их классы унаследованы от базового классификатора
             for other_index, (indices, probs) in zip(other_indexes, other_probs):
                 index = indexes[other_index]
                 indices = [curr_classes[j] for j in indices]
                 answer[index] = (indices, probs)
     else:
         cls_classes = [i for i, _ in enumerate(self.classes_)]
         active_classes_number = self.paradigm_classifier.active_classes_number
         (train_indexes, X_train), (other_indexes, other_probs) =\
             self._prepare_to_joint_classifier(self.paradigm_classifier, cls_classes, X,
                                               active_classes_number=active_classes_number)
         probs = self.joint_classifier.predict_proba(X_train)
         # объекты, чьи классы встречались в обучающей выборке
         for i, row, word_probs in zip(train_indexes, X_train, probs):
             # здесь надо разобраться
             row = row_denser(row)
             row_ = [j for j in range(active_classes_number) if row[3*j] == 0.0]
             answer[i] = self._extract_word_probs(word_probs, row_)
         # объекты, чьи классы не встречались в обучающей выборке
         # их классы унаследованы от базового классификатора
         for other_index, (indices, probs) in zip(other_indexes, other_probs):
             indices = [cls_classes[j] for j in indices]
             # print(X[other_index], self.paradigmers[indices[0]].descr)
             answer[other_index] = (indices, probs)
     # sys.exit()
     return answer
 def fit(self, X, y, X_dev=None, y_dev=None):
     if len(X) != len(y):
         raise ValueError("Data and labels should have equal length")
     self._prepare_parameters()
     self._prepare_classifiers()
     if isinstance(self.nfeatures, float):
         if self.nfeatures < 0.0 or self.nfeatures > 1.0:
             raise ValueError("If nfeatures is float, it should be from 0.0 to 1.0")
     # выбираем, производится ли классификация отдельно для каждой буквы
     if self.has_letter_classifiers:
         self.classes_, Y_new = np.unique(y, return_inverse=True)
         # ДОБАВЛЯЕМ КЛАССЫ ИЗ paradigm_table
         classes_set, self.classes_  = set(self.classes_), list(self.classes_)
         for code in self.paradigms_list.values():
             if code not in classes_set:
                 self.classes_.append(code)
         self.classes_ = np.array(self.classes_)
         # ПЕРЕКОДИРУЕМ КЛАССЫ
         recoded_paradigm_table = {self.descrs_by_classes[label]: i
                                   for i, label in enumerate(self.classes_)}
         self.paradigm_classifier.set_params(paradigm_table=recoded_paradigm_table)
         data_indexes_by_letters =\
             arrange_indexes_by_last_letters(X, [len(labels) for labels in y])
         X_by_letters, y_by_letters = dict(), dict()
         single_class_letters = dict()
         for letter, indexes in data_indexes_by_letters.items():
             X_curr, y_curr = [X[i] for i in indexes], [Y_new[i] for i in indexes]
             if min(y_curr) < max(y_curr):
                 X_by_letters[letter] = X_curr
                 y_by_letters[letter] = [[label] for label in y_curr]
             else:
                 single_class_letters[letter] = y_curr[0]
         self.letter_classifiers_ = {letter: clone(self.paradigm_classifier)
                                     for letter in X_by_letters}
         self.joint_classifiers_ =  {letter: clone(self.joint_classifier)
                                     for letter in X_by_letters}
         # определяем вероятности для букв, для которых нет классификаторов
         self._make_new_letter_probs(y)
         self._make_default_letter_probs(single_class_letters)
         for letter, X_curr in X_by_letters.items():
             self.letter_classifiers_[letter].fit(X_curr, y_by_letters[letter])
     else:
         self.paradigm_classifier.set_params(paradigm_table = self.paradigms_list)
         self.paradigm_classifier.fit(X, y)
         # self.paradigm_classifier уже содержал все классы,
         # поэтому ничего добавлять не нужно
         self.classes_ = self.paradigm_classifier.classes_
         self.active_classes_number = self.paradigm_classifier.active_classes_number
     # обработчики парадигм
     self.paradigmers = [ParadigmSubstitutor(self.descrs_by_classes[label])
                         for label in self.classes_]
     # обработчики лемм
     self._prepare_lemma_fragmentors()
     # вероятности граммем
     self.form_probabilities_for_paradigms =\
         [np.zeros(shape=(paradigmer.unique_forms_number(return_principal=False),),
                   dtype=np.float64) for paradigmer in self.paradigmers]
     self.reverse_classes = {label: i for (i, label) in enumerate(self.classes_)}
     self._fit_probabilities(X, [[self.reverse_classes[code] for code in labels]
                                 for labels in y])
     if self.has_letter_classifiers:
         for letter, X_curr in X_by_letters.items():
             cls = self.letter_classifiers_[letter]
             cls_classes = cls.classes_[:cls.active_classes_number]
             (_, X_joint, y_joint), _ = self._prepare_to_joint_classifier(
                 cls, cls_classes, X_curr, y_by_letters[letter])
             self.joint_classifiers_[letter].fit(X_joint, y_joint)
     else:
         cls = self.paradigm_classifier
         cls_classes = list(range(cls.active_classes_number))
         (_, X_joint, y_joint), _ =\
             self._prepare_to_joint_classifier(cls, cls_classes, X, y)
         self.joint_classifier.fit(X_joint, y_joint)
     return self