Esempio n. 1
0
def test_activation_coverage(act_type):
    encoder_num_hidden, decoder_num_hidden, coverage_num_hidden, source_seq_len, batch_size = 5, 5, 2, 10, 4

    # source: (batch_size, source_seq_len, encoder_num_hidden)
    source = mx.sym.Variable("source")
    # source_length: (batch_size,)
    source_length = mx.sym.Variable("source_length")
    # prev_hidden: (batch_size, decoder_num_hidden)
    prev_hidden = mx.sym.Variable("prev_hidden")
    # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden)
    prev_coverage = mx.sym.Variable("prev_coverage")
    # attention_scores: (batch_size, source_seq_len)
    attention_scores = mx.sym.Variable("attention_scores")

    source_shape = (batch_size, source_seq_len, encoder_num_hidden)
    source_length_shape = (batch_size, )
    prev_hidden_shape = (batch_size, decoder_num_hidden)
    attention_scores_shape = (batch_size, source_seq_len, 1)
    prev_coverage_shape = (batch_size, source_seq_len, coverage_num_hidden)

    source_data = gaussian_vector(shape=source_shape)
    source_length_data = integer_vector(shape=source_length_shape,
                                        max_value=source_seq_len)
    prev_hidden_data = gaussian_vector(shape=prev_hidden_shape)
    prev_coverage_data = gaussian_vector(shape=prev_coverage_shape)
    attention_scores_data = uniform_vector(shape=attention_scores_shape)
    attention_scores_data = attention_scores_data / np.sum(
        attention_scores_data)

    coverage = sockeye.coverage.get_coverage(
        coverage_type=act_type, coverage_num_hidden=coverage_num_hidden)
    coverage_func = coverage.on(source, source_length, source_seq_len)
    updated_coverage = coverage_func(prev_hidden, attention_scores,
                                     prev_coverage)

    executor = updated_coverage.simple_bind(
        ctx=mx.cpu(),
        source=source_shape,
        source_length=source_length_shape,
        prev_hidden=prev_hidden_shape,
        prev_coverage=prev_coverage_shape,
        attention_scores=attention_scores_shape)

    executor.arg_dict["source"][:] = source_data
    executor.arg_dict["source_length"][:] = source_length_data
    executor.arg_dict["prev_hidden"][:] = prev_hidden_data
    executor.arg_dict["prev_coverage"][:] = prev_coverage_data
    executor.arg_dict["attention_scores"][:] = attention_scores_data

    result = executor.forward()

    # this is needed to modulate the 0 input. The output changes according to the activation type used.
    activation = mx.sym.Activation(name="activation", act_type=act_type)
    modulated = activation.eval(ctx=mx.cpu(),
                                activation_data=mx.nd.zeros(
                                    (1, )))[0].asnumpy()

    new_coverage = result[0].asnumpy()

    assert new_coverage.shape == prev_coverage_shape
Esempio n. 2
0
def _test_activation_coverage(act_type):
    config_coverage = sockeye.coverage.CoverageConfig(
        type=act_type,
        max_fertility=2,
        num_hidden=2,
        layer_normalization=False)
    encoder_num_hidden, decoder_num_hidden, source_seq_len, batch_size = 5, 5, 10, 4
    # source: (batch_size, source_seq_len, encoder_num_hidden)
    source = mx.sym.Variable("source")
    # source_length: (batch_size,)
    source_length = mx.sym.Variable("source_length")
    # prev_hidden: (batch_size, decoder_num_hidden)
    prev_hidden = mx.sym.Variable("prev_hidden")
    # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden)
    prev_coverage = mx.sym.Variable("prev_coverage")
    # attention_scores: (batch_size, source_seq_len)
    attention_scores = mx.sym.Variable("attention_scores")
    source_shape = (batch_size, source_seq_len, encoder_num_hidden)
    source_length_shape = (batch_size, )
    prev_hidden_shape = (batch_size, decoder_num_hidden)
    attention_scores_shape = (batch_size, source_seq_len)
    prev_coverage_shape = (batch_size, source_seq_len,
                           config_coverage.num_hidden)
    source_data = gaussian_vector(shape=source_shape)
    source_length_data = integer_vector(shape=source_length_shape,
                                        max_value=source_seq_len)
    prev_hidden_data = gaussian_vector(shape=prev_hidden_shape)
    prev_coverage_data = gaussian_vector(shape=prev_coverage_shape)
    attention_scores_data = uniform_vector(shape=attention_scores_shape)
    attention_scores_data = attention_scores_data / np.sum(
        attention_scores_data)

    coverage = sockeye.coverage.get_coverage(config_coverage)
    coverage_func = coverage.on(source, source_length, source_seq_len)
    updated_coverage = coverage_func(prev_hidden, attention_scores,
                                     prev_coverage)
    executor = updated_coverage.simple_bind(
        ctx=mx.cpu(),
        source=source_shape,
        source_length=source_length_shape,
        prev_hidden=prev_hidden_shape,
        prev_coverage=prev_coverage_shape,
        attention_scores=attention_scores_shape)
    executor.arg_dict["source"][:] = source_data
    executor.arg_dict["source_length"][:] = source_length_data
    executor.arg_dict["prev_hidden"][:] = prev_hidden_data
    executor.arg_dict["prev_coverage"][:] = prev_coverage_data
    executor.arg_dict["attention_scores"][:] = attention_scores_data
    result = executor.forward()
    new_coverage = result[0].asnumpy()
    assert new_coverage.shape == prev_coverage_shape
    # this is needed to modulate the 0 input. The output changes according to the activation type used.
    modulated = mx.nd.Activation(mx.nd.zeros((1, 1)),
                                 act_type=act_type).asnumpy()
    assert (np.sum(
        np.sum(np.isclose(new_coverage, modulated, atol=1.e-6), axis=2) != 0,
        axis=1) == source_length_data).all()
Esempio n. 3
0
def test_gru_coverage():
    encoder_num_hidden, decoder_num_hidden, coverage_num_hidden, source_seq_len, batch_size = 5, 5, 2, 10, 4

    # source: (batch_size, source_seq_len, encoder_num_hidden)
    source = mx.sym.Variable("source")
    # source_length: (batch_size,)
    source_length = mx.sym.Variable("source_length")
    # prev_hidden: (batch_size, decoder_num_hidden)
    prev_hidden = mx.sym.Variable("prev_hidden")
    # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden)
    prev_coverage = mx.sym.Variable("prev_coverage")
    # attention_scores: (batch_size, source_seq_len)
    attention_scores = mx.sym.Variable("attention_scores")

    source_shape = (batch_size, source_seq_len, encoder_num_hidden)
    source_length_shape = (batch_size, )
    prev_hidden_shape = (batch_size, decoder_num_hidden)
    attention_scores_shape = (batch_size, source_seq_len)
    prev_coverage_shape = (batch_size, source_seq_len, coverage_num_hidden)

    source_data = gaussian_vector(shape=source_shape)
    source_length_data = integer_vector(shape=source_length_shape,
                                        max_value=source_seq_len)
    prev_hidden_data = gaussian_vector(shape=prev_hidden_shape)
    prev_coverage_data = gaussian_vector(shape=prev_coverage_shape)
    attention_scores_data = uniform_vector(shape=attention_scores_shape)
    attention_scores_data = attention_scores_data / np.sum(
        attention_scores_data)

    coverage = sockeye.coverage.get_coverage(
        coverage_type="gru", coverage_num_hidden=coverage_num_hidden)
    coverage_func = coverage.on(source, source_length, source_seq_len)
    updated_coverage = coverage_func(prev_hidden, attention_scores,
                                     prev_coverage)

    executor = updated_coverage.simple_bind(
        ctx=mx.cpu(),
        source=source_shape,
        source_length=source_length_shape,
        prev_hidden=prev_hidden_shape,
        prev_coverage=prev_coverage_shape,
        attention_scores=attention_scores_shape)

    executor.arg_dict["source"][:] = source_data
    executor.arg_dict["source_length"][:] = source_length_data
    executor.arg_dict["prev_hidden"][:] = prev_hidden_data
    executor.arg_dict["prev_coverage"][:] = prev_coverage_data
    executor.arg_dict["attention_scores"][:] = attention_scores_data

    result = executor.forward()
    new_coverage = result[0].asnumpy()

    assert new_coverage.shape == prev_coverage_shape
Esempio n. 4
0
def _test_activation_coverage(act_type):
    config_coverage = sockeye.coverage.CoverageConfig(type=act_type, num_hidden=2, layer_normalization=False)
    encoder_num_hidden, decoder_num_hidden, source_seq_len, batch_size = 5, 5, 10, 4
    # source: (batch_size, source_seq_len, encoder_num_hidden)
    source = mx.sym.Variable("source")
    # source_length: (batch_size,)
    source_length = mx.sym.Variable("source_length")
    # prev_hidden: (batch_size, decoder_num_hidden)
    prev_hidden = mx.sym.Variable("prev_hidden")
    # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden)
    prev_coverage = mx.sym.Variable("prev_coverage")
    # attention_scores: (batch_size, source_seq_len)
    attention_scores = mx.sym.Variable("attention_scores")
    source_shape = (batch_size, source_seq_len, encoder_num_hidden)
    source_length_shape = (batch_size,)
    prev_hidden_shape = (batch_size, decoder_num_hidden)
    attention_scores_shape = (batch_size, source_seq_len)
    prev_coverage_shape = (batch_size, source_seq_len, config_coverage.num_hidden)
    source_data = gaussian_vector(shape=source_shape)
    source_length_data = integer_vector(shape=source_length_shape, max_value=source_seq_len)
    prev_hidden_data = gaussian_vector(shape=prev_hidden_shape)
    prev_coverage_data = gaussian_vector(shape=prev_coverage_shape)
    attention_scores_data = uniform_vector(shape=attention_scores_shape)
    attention_scores_data = attention_scores_data / np.sum(attention_scores_data)

    coverage = sockeye.coverage.get_coverage(config_coverage)
    coverage_func = coverage.on(source, source_length, source_seq_len)
    updated_coverage = coverage_func(prev_hidden, attention_scores, prev_coverage)
    executor = updated_coverage.simple_bind(ctx=mx.cpu(),
                                            source=source_shape,
                                            source_length=source_length_shape,
                                            prev_hidden=prev_hidden_shape,
                                            prev_coverage=prev_coverage_shape,
                                            attention_scores=attention_scores_shape)
    executor.arg_dict["source"][:] = source_data
    executor.arg_dict["source_length"][:] = source_length_data
    executor.arg_dict["prev_hidden"][:] = prev_hidden_data
    executor.arg_dict["prev_coverage"][:] = prev_coverage_data
    executor.arg_dict["attention_scores"][:] = attention_scores_data
    result = executor.forward()
    new_coverage = result[0].asnumpy()
    assert new_coverage.shape == prev_coverage_shape
    # this is needed to modulate the 0 input. The output changes according to the activation type used.
    modulated = mx.nd.Activation(mx.nd.zeros((1, 1)), act_type=act_type).asnumpy()
    assert (np.sum(np.sum(np.isclose(new_coverage, modulated, atol=1.e-6), axis=2) != 0, axis=1) == source_length_data).all()
Esempio n. 5
0
def _test_gru_coverage():
    config_coverage = sockeye.coverage.CoverageConfig(type="gru", num_hidden=2, layer_normalization=False)
    encoder_num_hidden, decoder_num_hidden, source_seq_len, batch_size = 5, 5, 10, 4
    # source: (batch_size, source_seq_len, encoder_num_hidden)
    source = mx.sym.Variable("source")
    # source_length: (batch_size,)
    source_length = mx.sym.Variable("source_length")
    # prev_hidden: (batch_size, decoder_num_hidden)
    prev_hidden = mx.sym.Variable("prev_hidden")
    # prev_coverage: (batch_size, source_seq_len, coverage_num_hidden)
    prev_coverage = mx.sym.Variable("prev_coverage")
    # attention_scores: (batch_size, source_seq_len)
    attention_scores = mx.sym.Variable("attention_scores")
    source_shape = (batch_size, source_seq_len, encoder_num_hidden)
    source_length_shape = (batch_size,)
    prev_hidden_shape = (batch_size, decoder_num_hidden)
    attention_scores_shape = (batch_size, source_seq_len)
    prev_coverage_shape = (batch_size, source_seq_len, config_coverage.num_hidden)
    source_data = gaussian_vector(shape=source_shape)
    source_length_data = integer_vector(shape=source_length_shape, max_value=source_seq_len)
    prev_hidden_data = gaussian_vector(shape=prev_hidden_shape)
    prev_coverage_data = gaussian_vector(shape=prev_coverage_shape)
    attention_scores_data = uniform_vector(shape=attention_scores_shape)
    attention_scores_data = attention_scores_data / np.sum(attention_scores_data)
    coverage = sockeye.coverage.get_coverage(config_coverage)
    coverage_func = coverage.on(source, source_length, source_seq_len)
    updated_coverage = coverage_func(prev_hidden, attention_scores, prev_coverage)
    executor = updated_coverage.simple_bind(ctx=mx.cpu(),
                                            source=source_shape,
                                            source_length=source_length_shape,
                                            prev_hidden=prev_hidden_shape,
                                            prev_coverage=prev_coverage_shape,
                                            attention_scores=attention_scores_shape)
    executor.arg_dict["source"][:] = source_data
    executor.arg_dict["source_length"][:] = source_length_data
    executor.arg_dict["prev_hidden"][:] = prev_hidden_data
    executor.arg_dict["prev_coverage"][:] = prev_coverage_data
    executor.arg_dict["attention_scores"][:] = attention_scores_data
    result = executor.forward()
    new_coverage = result[0].asnumpy()
    assert new_coverage.shape == prev_coverage_shape
    assert (np.sum(np.sum(new_coverage != 1, axis=2) != 0, axis=1) == source_length_data).all()