Esempio n. 1
0
    def __init__(
        self,
        base_model: tf.keras.Model,
        units: int,
        chain_initializer: types.Initializer = "orthogonal",
        use_boundary: bool = True,
        boundary_initializer: types.Initializer = "zeros",
        use_kernel: bool = True,
        **kwargs,
    ):
        super().__init__()

        # lazy import to solve circle import issue:
        # tfa.layers.CRF -> tfa.text.__init__ -> tfa.text.crf_wrapper -> tfa.layers.CRF
        from tensorflow_addons.layers.crf import CRF  # noqa

        self.crf_layer = CRF(
            units=units,
            chain_initializer=chain_initializer,
            use_boundary=use_boundary,
            boundary_initializer=boundary_initializer,
            use_kernel=use_kernel,
            **kwargs,
        )

        self.base_model = base_model
Esempio n. 2
0
def get_test_data_extended():
    logits = np.array([
        [[0, 0, 0.5, 0.5, 0.2], [0, 0, 0.3, 0.3, 0.1], [0, 0, 0.9, 10, 1]],
        [[0, 0, 0.2, 0.5, 0.2], [0, 0, 3, 0.3, 0.1], [0, 0, 0.9, 1, 1]],
    ])
    tags = np.array([[2, 3, 4], [3, 2, 2]])

    transitions = np.array([
        [0.1, 0.2, 0.3, 0.4, 0.5],
        [0.8, 0.3, 0.1, 0.7, 0.9],
        [-0.3, 2.1, -5.6, 3.4, 4.0],
        [0.2, 0.4, 0.6, -0.3, -0.4],
        [1.0, 1.0, 1.0, 1.0, 1.0],
    ])

    boundary_values = np.ones((5, ))
    crf_layer = CRF(
        units=5,
        use_kernel=False,  # disable kernel transform
        chain_initializer=tf.keras.initializers.Constant(transitions),
        use_boundary=True,
        boundary_initializer=tf.keras.initializers.Constant(boundary_values),
        name="crf_layer",
    )
    return logits, tags, transitions, boundary_values, crf_layer
Esempio n. 3
0
    def __init__(self, vocab_size, embedding_size, hidden_size, tag_size,
                 *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_size)
        self.bi_lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            hidden_size, return_sequences=True),
                                                     merge_mode="concat")
        self.dense = tf.keras.layers.Dense(tag_size)
        self.crf = CRF(tag_size)
Esempio n. 4
0
    def __init__(self, words_count, labels_count, max_length):
        super(BiLSTMCRF, self).__init__()

        self.embedding = Embedding(words_count,
                                   20,
                                   input_length=max_length,
                                   mask_zero=True)
        self.lstm = Bidirectional(
            LSTM(50, recurrent_dropout=0.1, return_sequences=True))
        self.dense = TimeDistributed(Dense(50, "relu"))
        self.crf = CRF(labels_count)
Esempio n. 5
0
def get_some_model(x_np, y_np, sanity_check=True):
    x_input = tf.keras.layers.Input(shape=x_np.shape[1:])
    crf_outputs = CRF(5, name="L")(x_input)
    base_model = tf.keras.Model(x_input, crf_outputs)

    model = ModelWithCRFLoss(base_model)

    model.compile("adam")
    if sanity_check:
        model.fit(x=x_np, y=y_np)
        model.evaluate(x_np, y_np)
    model.predict(x_np)
    return model
    def __init__(self, params: BaseParams, problem_name: str):
        super(SequenceLabel, self).__init__(name=problem_name)
        self.params = params
        self.problem_name = problem_name
        num_classes = self.params.num_classes[self.problem_name]
        self.dense = tf.keras.layers.Dense(num_classes, activation=None)

        self.dropout = tf.keras.layers.Dropout(1 - params.dropout_keep_prob)

        if self.params.crf:
            self.crf = CRF(num_classes)
            self.metric_fn = tf.keras.metrics.Accuracy(
                name='{}_acc'.format(self.problem_name))
        else:
            self.metric_fn = tf.keras.metrics.SparseCategoricalAccuracy(
                name='{}_acc'.format(self.problem_name))
Esempio n. 7
0
def test_unmasked_viterbi_decode():
    x_np, y_np = get_test_data()

    transitions = np.ones([5, 5])
    boundary_value = np.ones(5)

    layer = CRF(
        units=5,
        use_kernel=False,  # disable kernel transform
        chain_initializer=tf.keras.initializers.Constant(transitions),
        use_boundary=True,
        boundary_initializer=tf.keras.initializers.Constant(boundary_value),
    )

    decoded_sequence, _, _, _ = layer(x_np)
    decoded_sequence = decoded_sequence.numpy()
    np.testing.assert_equal(decoded_sequence, y_np)
    assert decoded_sequence.dtype == np.int32
Esempio n. 8
0
def test_mask_left_padding():
    x_np, y_np = get_test_data()
    mask = np.array([[0, 1, 1], [1, 1, 1]])

    x = tf.keras.layers.Input(shape=x_np.shape[1:])
    crf_layer_outputs = CRF(5)(x, mask=tf.constant(mask))

    base_model = tf.keras.Model(x, crf_layer_outputs)
    model = ModelWithCRFLoss(base_model)

    # we can only check the value of the mask
    # if we run eagerly. It's kind of a debug mode
    # otherwise we're wasting computation.
    model.compile("adam", run_eagerly=True)

    with pytest.raises(NotImplementedError) as context:
        model(x_np).numpy()

    assert "CRF layer do not support left padding" in str(context.value)
Esempio n. 9
0
def test_mask_right_padding():
    x_np, y_np = get_test_data()
    mask = np.array([[1, 1, 1], [1, 1, 0]])

    x = tf.keras.layers.Input(shape=x_np.shape[1:])

    crf_layer_outputs = CRF(5)(x, mask=tf.constant(mask))

    base_model = tf.keras.Model(x, crf_layer_outputs)
    model = ModelWithCRFLoss(base_model)

    # check shape inference
    model.compile("adam")
    old_weights = model.get_weights()
    model.fit(x_np, y_np)
    new_weights = model.get_weights()

    # we check that the weights were updated during the training phase.
    with pytest.raises(AssertionError):
        assert_all_equal(old_weights, new_weights)

    model.predict(x_np)
Esempio n. 10
0
class CRFModelWrapper(tf.keras.Model):
    def __init__(
        self,
        base_model: tf.keras.Model,
        units: int,
        chain_initializer: types.Initializer = "orthogonal",
        use_boundary: bool = True,
        boundary_initializer: types.Initializer = "zeros",
        use_kernel: bool = True,
        **kwargs,
    ):
        super().__init__()

        # lazy import to solve circle import issue:
        # tfa.layers.CRF -> tfa.text.__init__ -> tfa.text.crf_wrapper -> tfa.layers.CRF
        from tensorflow_addons.layers.crf import CRF  # noqa

        self.crf_layer = CRF(
            units=units,
            chain_initializer=chain_initializer,
            use_boundary=use_boundary,
            boundary_initializer=boundary_initializer,
            use_kernel=use_kernel,
            **kwargs,
        )

        self.base_model = base_model

    def unpack_training_data(self, data):
        # override me, if this is not suit for your task
        if len(data) == 3:
            x, y, sample_weight = data
        else:
            x, y = data
            sample_weight = None
        return x, y, sample_weight

    def call(self,
             inputs,
             training=None,
             mask=None,
             return_crf_internal=False):
        base_model_outputs = self.base_model(inputs, training, mask)

        # change next line, if your model has more outputs
        crf_input = base_model_outputs

        decode_sequence, potentials, sequence_length, kernel = self.crf_layer(
            crf_input)

        # change next line, if your base model has more outputs
        # Aways keep `(potentials, sequence_length, kernel), decode_sequence, `
        # as first two outputs of model.
        # current `self.train_step()` expected such settings
        outputs = (potentials, sequence_length, kernel), decode_sequence

        if return_crf_internal:
            return outputs
        else:
            # outputs[0] is the crf internal, skip it
            output_without_crf_internal = outputs[1:]

            # it is nicer to return a tensor instead of an one tensor list
            if len(output_without_crf_internal) == 1:
                return output_without_crf_internal[0]
            else:
                return output_without_crf_internal

    def compute_crf_loss(self,
                         potentials,
                         sequence_length,
                         kernel,
                         y,
                         sample_weight=None):
        crf_likelihood, _ = crf_log_likelihood(potentials, y, sequence_length,
                                               kernel)
        # convert likelihood to loss
        flat_crf_loss = -1 * crf_likelihood
        if sample_weight is not None:
            flat_crf_loss = flat_crf_loss * sample_weight
        crf_loss = tf.reduce_mean(flat_crf_loss)

        return crf_loss

    def train_step(self, data):
        x, y, sample_weight = self.unpack_training_data(data)
        with tf.GradientTape() as tape:
            (potentials, sequence_length,
             kernel), decoded_sequence, *_ = self(x,
                                                  training=True,
                                                  return_crf_internal=True)
            crf_loss = self.compute_crf_loss(potentials, sequence_length,
                                             kernel, y, sample_weight)
            loss = crf_loss + tf.reduce_sum(self.losses)
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients,
                                           self.trainable_variables))

        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, decoded_sequence)
        # Return a dict mapping metric names to current value
        orig_results = {m.name: m.result() for m in self.metrics}
        crf_results = {"loss": loss, "crf_loss": crf_loss}
        return {**orig_results, **crf_results}

    def test_step(self, data):
        x, y, sample_weight = self.unpack_training_data(data)
        (potentials, sequence_length,
         kernel), decode_sequence, *_ = self(x,
                                             training=False,
                                             return_crf_internal=True)
        crf_loss = self.compute_crf_loss(potentials, sequence_length, kernel,
                                         y, sample_weight)
        loss = crf_loss + tf.reduce_sum(self.losses)
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, decode_sequence)
        # Return a dict mapping metric names to current value
        results = {m.name: m.result() for m in self.metrics}
        results.update({"loss": loss, "crf_loss": crf_loss})  # append loss
        return results

    def get_config(self):
        base_model_config = self.base_model.get_config()
        crf_config = self.crf_layer.get_config()

        return {**{"base_model": base_model_config}, **crf_config}

    @classmethod
    def from_config(cls, config):
        base_model_config = config.pop("base_model")
        base_model = tf.keras.Model.from_config(base_model_config)

        return cls(base_model=base_model, **config)