def create_custom_objects(model_info): custom_objects = {} loss = model_info.get('loss') if loss and loss['name'] == 'weighted_categorical_crossentropy': loss_f = helper.weighted_categorical_crossentropy( np.array(loss['weights'])) custom_objects['loss'] = loss_f if loss and loss['name'] == 'crf': custom_objects['CRF'] = CRF custom_objects['crf_loss'] = crf_loss custom_objects['crf_viterbi_accuracy'] = CRF(128).accuracy return custom_objects
def create_custom_objects(model_info): custom_objects = {} loss = model_info.get('loss') if loss and loss['name'] == 'weighted_categorical_crossentropy': loss_f = helper.weighted_categorical_crossentropy( np.array(loss['weights'])) custom_objects['loss'] = loss_f if loss and loss['name'] == 'crf': custom_objects['CRF'] = CRF custom_objects['crf_loss'] = crf_loss custom_objects['crf_viterbi_accuracy'] = CRF(128).accuracy embedding = model_info.get('embedding') if embedding and embedding['type'] == 'bert': custom_objects['NonMaskingLayer'] = helper.NonMaskingLayer custom_objects.update(get_bert_custom_objects()) return custom_objects
def create_custom_objects(model_info): custom_objects = {} loss = model_info.get('model_info', {}).get('loss') if loss and loss['name'] == 'weighted_categorical_crossentropy': loss_f = helper.weighted_categorical_crossentropy(np.array(loss['weights'])) custom_objects['loss'] = loss_f architect_name = model_info.get('architect_name') if architect_name and 'CRF' in architect_name: custom_objects['CRF'] = CRF custom_objects['crf_loss'] = crf_loss custom_objects['crf_accuracy'] = crf_accuracy embedding = model_info.get('embedding') if embedding and embedding['embedding_type'] == 'bert': custom_objects['NonMaskingLayer'] = helper.NonMaskingLayer custom_objects.update(get_bert_custom_objects()) custom_objects['AttentionWeightedAverage'] = AttentionWeightedAverage custom_objects['KMaxPooling'] = KMaxPooling return custom_objects
def build_model( self, x_train: List[List[str]], y_train: List[List[str]], x_validate: List[List[str]] = None, y_validate: List[List[str]] = None, labels_weight: bool = None, default_labels_weight: float = 50.0, ): assert len(x_train) == len(y_train) self.build_token2id_label2id_dict(x_train, y_train, x_validate, y_validate) if not self.model: if self.embedding.sequence_length == 0: self.embedding.sequence_length = sorted( [len(x) for x in x_train])[int(0.95 * len(x_train))] logging.info('sequence length set to {}'.format( self.embedding.sequence_length)) if labels_weight: weights = [] initial_weights = {k.PAD: 1, k.BOS: 1, k.EOS: 1, 'O': 1} for label in self.label2idx.keys(): weights.append( initial_weights.get(label, default_labels_weight)) loss_f = helper.weighted_categorical_crossentropy( np.array(weights)) self.model_info['loss'] = { 'func': 'weighted_categorical_crossentropy', 'weights': weights } self._prepare_model() self._compile_model() self.model.summary()
def fit(self, x_train: List[List[str]], y_train: List[List[str]], x_validate: List[List[str]] = None, y_validate: List[List[str]] = None, batch_size: int = 64, epochs: int = 5, labels_weight: bool = None, default_labels_weight: float = 50.0, fit_kwargs: Dict = None, **kwargs): """ :param x_train: list of training data. :param y_train: list of training target label data. :param batch_size: batch size for trainer model :param epochs: Number of epochs to train the model. :param x_validate: list of validation data. :param y_validate: list of validation target label data. :param y_validate: list of validation target label data. :param y_validate: list of validation target label data. :param labels_weight: set class weights for imbalanced classes :param default_labels_weight: default weight for labels not in labels_weight dict :param fit_kwargs: additional kwargs to be passed to :func:`~keras.models.Model.fit` :return: """ assert len(x_train) == len(y_train) self.build_token2id_label2id_dict(x_train, y_train, x_validate, y_validate) if len(x_train) < batch_size: batch_size = len(x_train) // 2 if not self.model: if self.embedding.sequence_length == 0: self.embedding.sequence_length = sorted( [len(x) for x in x_train])[int(0.95 * len(x_train))] logging.info('sequence length set to {}'.format( self.embedding.sequence_length)) if labels_weight: weights = [] initial_weights = {k.PAD: 1, k.BOS: 1, k.EOS: 1, 'O': 1} for label in self.label2idx.keys(): weights.append( initial_weights.get(label, default_labels_weight)) loss_f = helper.weighted_categorical_crossentropy( np.array(weights)) self.model_info['loss'] = { 'func': 'weighted_categorical_crossentropy', 'weights': weights } self.build_model(loss_f=loss_f, metrics=['categorical_accuracy', 'acc']) else: self.build_model() train_generator = self.get_data_generator(x_train, y_train, batch_size) if fit_kwargs is None: fit_kwargs = {} if x_validate: validation_generator = self.get_data_generator( x_validate, y_validate, batch_size) fit_kwargs['validation_data'] = validation_generator fit_kwargs['validation_steps'] = len(x_validate) // batch_size self.model.fit_generator(train_generator, steps_per_epoch=len(x_train) // batch_size, epochs=epochs, **fit_kwargs)