예제 #1
0
    def create_custom_objects(model_info):
        custom_objects = {}
        loss = model_info.get('loss')
        if loss and loss['name'] == 'weighted_categorical_crossentropy':
            loss_f = helper.weighted_categorical_crossentropy(
                np.array(loss['weights']))
            custom_objects['loss'] = loss_f

        if loss and loss['name'] == 'crf':
            custom_objects['CRF'] = CRF
            custom_objects['crf_loss'] = crf_loss
            custom_objects['crf_viterbi_accuracy'] = CRF(128).accuracy

        return custom_objects
예제 #2
0
    def create_custom_objects(model_info):
        custom_objects = {}
        loss = model_info.get('loss')
        if loss and loss['name'] == 'weighted_categorical_crossentropy':
            loss_f = helper.weighted_categorical_crossentropy(
                np.array(loss['weights']))
            custom_objects['loss'] = loss_f

        if loss and loss['name'] == 'crf':
            custom_objects['CRF'] = CRF
            custom_objects['crf_loss'] = crf_loss
            custom_objects['crf_viterbi_accuracy'] = CRF(128).accuracy

        embedding = model_info.get('embedding')

        if embedding and embedding['type'] == 'bert':
            custom_objects['NonMaskingLayer'] = helper.NonMaskingLayer
            custom_objects.update(get_bert_custom_objects())

        return custom_objects
예제 #3
0
    def create_custom_objects(model_info):
        custom_objects = {}
        loss = model_info.get('model_info', {}).get('loss')
        if loss and loss['name'] == 'weighted_categorical_crossentropy':
            loss_f = helper.weighted_categorical_crossentropy(np.array(loss['weights']))
            custom_objects['loss'] = loss_f

        architect_name = model_info.get('architect_name')
        if architect_name and 'CRF' in architect_name:
            custom_objects['CRF'] = CRF
            custom_objects['crf_loss'] = crf_loss
            custom_objects['crf_accuracy'] = crf_accuracy

        embedding = model_info.get('embedding')

        if embedding and embedding['embedding_type'] == 'bert':
            custom_objects['NonMaskingLayer'] = helper.NonMaskingLayer
            custom_objects.update(get_bert_custom_objects())
        custom_objects['AttentionWeightedAverage'] = AttentionWeightedAverage
        custom_objects['KMaxPooling'] = KMaxPooling
        return custom_objects
예제 #4
0
    def build_model(
        self,
        x_train: List[List[str]],
        y_train: List[List[str]],
        x_validate: List[List[str]] = None,
        y_validate: List[List[str]] = None,
        labels_weight: bool = None,
        default_labels_weight: float = 50.0,
    ):
        assert len(x_train) == len(y_train)
        self.build_token2id_label2id_dict(x_train, y_train, x_validate,
                                          y_validate)

        if not self.model:
            if self.embedding.sequence_length == 0:
                self.embedding.sequence_length = sorted(
                    [len(x) for x in x_train])[int(0.95 * len(x_train))]
                logging.info('sequence length set to {}'.format(
                    self.embedding.sequence_length))

            if labels_weight:
                weights = []
                initial_weights = {k.PAD: 1, k.BOS: 1, k.EOS: 1, 'O': 1}
                for label in self.label2idx.keys():
                    weights.append(
                        initial_weights.get(label, default_labels_weight))
                loss_f = helper.weighted_categorical_crossentropy(
                    np.array(weights))
                self.model_info['loss'] = {
                    'func': 'weighted_categorical_crossentropy',
                    'weights': weights
                }

        self._prepare_model()
        self._compile_model()
        self.model.summary()
예제 #5
0
    def fit(self,
            x_train: List[List[str]],
            y_train: List[List[str]],
            x_validate: List[List[str]] = None,
            y_validate: List[List[str]] = None,
            batch_size: int = 64,
            epochs: int = 5,
            labels_weight: bool = None,
            default_labels_weight: float = 50.0,
            fit_kwargs: Dict = None,
            **kwargs):
        """

        :param x_train: list of training data.
        :param y_train: list of training target label data.
        :param batch_size: batch size for trainer model
        :param epochs: Number of epochs to train the model.
        :param x_validate: list of validation data.
        :param y_validate: list of validation target label data.
        :param y_validate: list of validation target label data.
        :param y_validate: list of validation target label data.
        :param labels_weight: set class weights for imbalanced classes
        :param default_labels_weight: default weight for labels not in labels_weight dict
        :param fit_kwargs: additional kwargs to be passed to
               :func:`~keras.models.Model.fit`
        :return:
        """
        assert len(x_train) == len(y_train)
        self.build_token2id_label2id_dict(x_train, y_train, x_validate,
                                          y_validate)

        if len(x_train) < batch_size:
            batch_size = len(x_train) // 2

        if not self.model:
            if self.embedding.sequence_length == 0:
                self.embedding.sequence_length = sorted(
                    [len(x) for x in x_train])[int(0.95 * len(x_train))]
                logging.info('sequence length set to {}'.format(
                    self.embedding.sequence_length))

            if labels_weight:
                weights = []
                initial_weights = {k.PAD: 1, k.BOS: 1, k.EOS: 1, 'O': 1}
                for label in self.label2idx.keys():
                    weights.append(
                        initial_weights.get(label, default_labels_weight))
                loss_f = helper.weighted_categorical_crossentropy(
                    np.array(weights))
                self.model_info['loss'] = {
                    'func': 'weighted_categorical_crossentropy',
                    'weights': weights
                }

                self.build_model(loss_f=loss_f,
                                 metrics=['categorical_accuracy', 'acc'])
            else:
                self.build_model()

        train_generator = self.get_data_generator(x_train, y_train, batch_size)

        if fit_kwargs is None:
            fit_kwargs = {}

        if x_validate:
            validation_generator = self.get_data_generator(
                x_validate, y_validate, batch_size)

            fit_kwargs['validation_data'] = validation_generator
            fit_kwargs['validation_steps'] = len(x_validate) // batch_size

        self.model.fit_generator(train_generator,
                                 steps_per_epoch=len(x_train) // batch_size,
                                 epochs=epochs,
                                 **fit_kwargs)