예제 #1
0
def _test_gather_random_array(test_case, device):
    input = np.random.randn(3, 4, 3, 5)
    index = np.random.choice(np.arange(3), size=180, replace=True).reshape(
        (3, 4, 3, 5))
    np_out = np.take_along_axis(input, index, 1)
    output = flow.gather(
        flow.tensor(input, dtype=flow.float32, device=flow.device(device)),
        1,
        flow.tensor(index, dtype=flow.int, device=flow.device(device)),
    )
    test_case.assertTrue(np.allclose(output.numpy(), np_out))
    np_out2 = np.take_along_axis(input, index, 2)
    output2 = flow.gather(
        flow.tensor(input, dtype=flow.float32, device=flow.device(device)),
        2,
        flow.tensor(index, dtype=flow.int, device=flow.device(device)),
    )
    test_case.assertTrue(np.allclose(output2.numpy(), np_out2))
    np_out3 = np.take_along_axis(input, index, 3)
    output3 = flow.gather(
        flow.tensor(input, dtype=flow.float32, device=flow.device(device)),
        3,
        flow.tensor(index, dtype=flow.int, device=flow.device(device)),
    )
    test_case.assertTrue(np.allclose(output3.numpy(), np_out3))
def _model(dense_fields, wide_sparse_fields, deep_sparse_fields):
    wide_sparse_fields = flow.parallel_cast(
        wide_sparse_fields, distribute=flow.distribute.broadcast())
    wide_embedding_table = flow.get_variable(
        name='wide_embedding',
        shape=(FLAGS.wide_vocab_size, 1),
        initializer=flow.random_uniform_initializer(minval=-0.05, maxval=0.05),
        distribute=flow.distribute.split(0),
    )
    wide_embedding = flow.gather(params=wide_embedding_table,
                                 indices=wide_sparse_fields)
    wide_embedding = flow.reshape(wide_embedding,
                                  shape=(-1, wide_embedding.shape[-1] *
                                         wide_embedding.shape[-2]))
    wide_scores = flow.math.reduce_sum(wide_embedding, axis=[1], keepdims=True)
    wide_scores = flow.parallel_cast(
        wide_scores,
        distribute=flow.distribute.split(0),
        gradient_distribute=flow.distribute.broadcast())

    deep_sparse_fields = flow.parallel_cast(
        deep_sparse_fields, distribute=flow.distribute.broadcast())
    deep_embedding_table = flow.get_variable(
        name='deep_embedding',
        shape=(FLAGS.deep_vocab_size, FLAGS.deep_embedding_vec_size),
        initializer=flow.random_uniform_initializer(minval=-0.05, maxval=0.05),
        distribute=flow.distribute.split(1),
    )
    deep_embedding = flow.gather(params=deep_embedding_table,
                                 indices=deep_sparse_fields)
    deep_embedding = flow.parallel_cast(
        deep_embedding,
        distribute=flow.distribute.split(0),
        gradient_distribute=flow.distribute.split(2))
    deep_embedding = flow.reshape(deep_embedding,
                                  shape=(-1, deep_embedding.shape[-1] *
                                         deep_embedding.shape[-2]))
    deep_features = flow.concat([deep_embedding, dense_fields], axis=1)
    for idx, units in enumerate(DEEP_HIDDEN_UNITS):
        deep_features = flow.layers.dense(
            deep_features,
            units=units,
            kernel_initializer=flow.glorot_uniform_initializer(),
            bias_initializer=flow.constant_initializer(0.0),
            activation=flow.math.relu,
            name='fc' + str(idx + 1))
        deep_features = flow.nn.dropout(deep_features,
                                        rate=FLAGS.deep_dropout_rate)
    deep_scores = flow.layers.dense(
        deep_features,
        units=1,
        kernel_initializer=flow.glorot_uniform_initializer(),
        bias_initializer=flow.constant_initializer(0.0),
        name='fc' + str(len(DEEP_HIDDEN_UNITS) + 1))

    scores = wide_scores + deep_scores
    return scores
예제 #3
0
파일: sort.py 프로젝트: zzk0/oneflow
def sort_op(input, dim: int = -1, descending: bool = False):
    num_dims = len(input.shape)
    dim = dim if dim >= 0 else dim + num_dims
    direction = "DESCENDING" if descending else "ASCENDING"
    assert 0 <= dim < num_dims, "dim out of range"
    if dim == num_dims - 1:
        indices = flow._C.arg_sort(input, direction)
        return (flow.gather(input, dim, indices), indices)
    else:
        perm = get_perm_when_transpose_axis_to_last_dim(num_dims, dim)
        x = flow._C.transpose(input, perm=perm)
        indices = flow._C.arg_sort(x, direction)
        indices = flow._C.transpose(indices, perm=get_inversed_perm(perm))
        return (flow.gather(input, dim, indices), indices)
예제 #4
0
def positional_encoding(position, d_model, name="positional_encoding"):
    """
    Do positional encoding
    :param position: The position
    :param d_model: The hidden dimension in model
    :return: shape like (1, position, d_model)
    """
    with flow.scope.namespace(name):
        # shape = (position, 1)
        input_pos = flow.expand_dims(flow.range(position, dtype=flow.float32, name="pos"), axis=1)

        # shape = (1, d_model)
        input_d_model = flow.expand_dims(flow.range(d_model, dtype=flow.float32, name="d_model"), axis=0)

        angle_rads = get_angles(input_pos, input_d_model, d_model)

        # Get a even range like (0, 2, 4, 6, ....., d_model)
        even_range = flow.range(0, d_model, 2, dtype=flow.int32, name="even_range")

        # Do the sin in even indexes
        even_out = flow.math.sin(flow.gather(angle_rads, even_range, axis=1))

        # Get a odd range like (1, 3, 5, 7, ....., d_model)
        odd_range = flow.range(1, d_model, 2, dtype=flow.int32, name="odd_range")

        # Do the cos in odd indexes
        odd_out = flow.math.cos(flow.gather(angle_rads, odd_range, axis=1))

        # Initialize Position encode constant
        position_encode = flow.constant(0, dtype=flow.float32, shape=(d_model, position), name="pos_ende")

        # Due to the scatter only support row indexes, we need to transpose
        even_out = flow.tensor_scatter_nd_update(position_encode,
                                                 flow.expand_dims(even_range, axis=1),
                                                 flow.transpose(even_out, perm=[1, 0]))

        odd_out = flow.tensor_scatter_nd_update(position_encode,
                                                flow.expand_dims(odd_range, axis=1),
                                                flow.transpose(odd_out, perm=[1, 0]))

        # Add even indexes value and odd indexes value
        out = even_out + odd_out

        # Because We have transposed in even_out and odd_out, So we need to transpose back
        out = flow.transpose(out, perm=[1, 0])
        # Expand dims in dim=0, we get shape like (1, position, d_model)
        out = flow.expand_dims(out, axis=0)

    return out
예제 #5
0
def _GatherIndexes(sequence_blob, positions_blob, seq_length, hidden_size):
    output = flow.gather(params=sequence_blob,
                         indices=positions_blob,
                         axis=2,
                         batch_dims=2)
    output = flow.reshape(output, [-1, hidden_size])
    return output
예제 #6
0
 def test_gather_dim_value_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.ones((2, 2), dtype=flow.float32, requires_grad=True)
         x2 = flow.ones((2, 2), dtype=flow.int64)
         y = flow.gather(x1, 2, x2)
     test_case.assertTrue(
         "Dimension out of range" in str(context.exception))
예제 #7
0
 def test_gather_size_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.ones((2, 2), dtype=flow.float32, requires_grad=True)
         x2 = flow.ones((4, 2), dtype=flow.int64)
         y = flow.gather(x1, 1, x2)
     test_case.assertTrue(
         "Size does not match at dimension" in str(context.exception))
예제 #8
0
    def get_masked_lm_loss(
        logit_blob,
        masked_lm_positions,
        masked_lm_labels,
        label_weights,
        max_prediction_per_seq,
    ):
        # gather valid position indices
        logit_blob = flow.gather(
            logit_blob,
            index=masked_lm_positions.unsqueeze(2).repeat(
                1, 1, args.vocab_size),
            dim=1,
        )

        logit_blob = flow.reshape(logit_blob, [-1, args.vocab_size])
        label_id_blob = flow.reshape(masked_lm_labels, [-1])

        # The `positions` tensor might be zero-padded (if the sequence is too
        # short to have the maximum number of predictions). The `label_weights`
        # tensor has a value of 1.0 for every real prediction and 0.0 for the
        # padding predictions.
        pre_example_loss = mlm_criterion(logit_blob, label_id_blob)
        pre_example_loss = flow.reshape(pre_example_loss,
                                        [-1, max_prediction_per_seq])
        numerator = flow.sum(pre_example_loss * label_weights)
        denominator = flow.sum(label_weights) + 1e-5
        loss = numerator / denominator
        return loss
예제 #9
0
def get_masked_lm_loss(
    logit_blob,
    masked_lm_positions,
    masked_lm_labels,
    label_weights,
    max_prediction_per_seq=20,
):
    # gather valid position indices
    logit_blob = flow.gather(
        logit_blob,
        index=masked_lm_positions.unsqueeze(2).repeat(1, 1, 30522),
        dim=1,
    )
    logit_blob = flow.reshape(logit_blob, [-1, 30522])
    label_id_blob = flow.reshape(masked_lm_labels, [-1])

    # The `positions` tensor might be zero-padded (if the sequence is too
    # short to have the maximum number of predictions). The `label_weights`
    # tensor has a value of 1.0 for every real prediction and 0.0 for the
    # padding predictions.
    pre_example_loss = nn.CrossEntropyLoss(reduction="none")(logit_blob,
                                                             label_id_blob)
    pre_example_loss = flow.reshape(pre_example_loss,
                                    [-1, max_prediction_per_seq])
    sum_label_weight = flow.sum(label_weights, dim=-1)
    sum_label_weight = sum_label_weight / label_weights.shape[0]
    numerator = flow.sum(pre_example_loss * label_weights)
    denominator = flow.sum(label_weights) + 1e-5
    loss = numerator / denominator
    return logit_blob, loss
예제 #10
0
def shuffle(
    value: remote_blob_util.BlobDef,
    seed: Optional[int] = None,
    name: Optional[str] = None,
) -> remote_blob_util.BlobDef:
    return flow.gather(value,
                       generate_random_batch_permutation_indices(value, seed))
예제 #11
0
 def test_gather_index_type_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.ones((2, 2), dtype=flow.float32, requires_grad=True)
         x2 = flow.ones((2, 2), dtype=flow.float32)
         y = flow.gather(x1, 1, x2)
     test_case.assertTrue(
         "gather(): Expected dtype int32 or int64 for index" in str(
             context.exception))
예제 #12
0
 def gather_model_parallel_fw_job(
         params: oft.Numpy.Placeholder(params_shape, dtype=flow.float),
         indices: oft.Numpy.Placeholder(indices_shape, dtype=flow.int32),
 ):
     with flow.scope.placement(device_type, "0:0-3"):
         params = params.with_distribute(flow.distribute.split(split_axis))
         indices = indices.with_distribute(flow.distribute.broadcast())
         return flow.gather(params=params, indices=indices, axis=axis)
예제 #13
0
def _test_gather_input_0dim_tensor(test_case, device):
    input = flow.tensor(1.0).to(device)
    input.requires_grad = True
    index = flow.tensor([0]).to(device)
    output = flow.gather(input, 0, index)
    test_case.assertTrue(np.array_equal(output.numpy(), [1.0]))
    output.sum().backward()
    test_case.assertTrue(np.array_equal(input.grad.numpy(), 1.0))
예제 #14
0
 def test_gather_dim_equal_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.ones((2, 2), dtype=flow.float32, requires_grad=True)
         x2 = flow.ones((2, 2, 2), dtype=flow.int64)
         y = flow.gather(x1, 1, x2)
     test_case.assertTrue(
         "Index tensor must have the same number of dimensions as input tensor"
         in str(context.exception))
예제 #15
0
파일: moe.py 프로젝트: Oneflow-Inc/models
    def _prob_in_top_k(
        self, clean_values, noisy_values, noise_stddev, noisy_top_values
    ):
        """Helper function to NoisyTopKGating.
        Computes the probability that value is in top k, given different random noise.
        This gives us a way of backpropagating from a loss that balances the number
        of times each expert is in the top k experts per example.
        In the case of no noise, pass in None for noise_stddev, and the result will
        not be differentiable.
        Args:
        clean_values: a `Tensor` of shape [batch, n].
        noisy_values: a `Tensor` of shape [batch, n].  Equal to clean values plus
          normally distributed noise with standard deviation noise_stddev.
        noise_stddev: a `Tensor` of shape [batch, n], or None
        noisy_top_values: a `Tensor` of shape [batch, m].
           "values" Output of tf.top_k(noisy_top_values, m).  m >= k+1
        Returns:
        a `Tensor` of shape [batch, n].
        """

        batch = clean_values.size(0)
        m = noisy_top_values.size(1)
        top_values_flat = noisy_top_values.flatten()

        threshold_positions_if_in = (
            flow.arange(batch, device=noisy_values.device) * m + self.k
        )

        threshold_if_in = flow.unsqueeze(
            flow.gather(top_values_flat, 0, threshold_positions_if_in), 1
        )
        is_in = flow.gt(noisy_values, threshold_if_in)

        threshold_positions_if_out = threshold_positions_if_in - 1
        threshold_if_out = flow.unsqueeze(
            flow.gather(top_values_flat, 0, threshold_positions_if_out), 1
        )

        # is each value currently in the top k.
        prob_if_in = cdf((clean_values - threshold_if_in) / noise_stddev)
        prob_if_out = cdf((clean_values - threshold_if_out) / noise_stddev)

        prob = flow.where(is_in, prob_if_in, prob_if_out)
        return prob
예제 #16
0
def _EmbeddingLookup(input_ids_blob,
                     vocab_size,
                     embedding_size=128,
                     initializer_range=0.02,
                     word_embedding_name="word_embeddings"):
  embedding_table = flow.get_variable(name=word_embedding_name, shape=[vocab_size, embedding_size],
                                      dtype=flow.float,
                                      initializer=CreateInitializer(initializer_range))
  output = flow.gather(params=embedding_table, indices=input_ids_blob, axis=0)
  return output, embedding_table
예제 #17
0
def _test_gather(test_case, device):
    input = np.array([[1, 2], [3, 4]])
    index = np.array([[0, 0], [1, 0]])
    np_out = np.take_along_axis(input, index, 0)
    output = flow.gather(
        flow.tensor(input, dtype=flow.float32, device=flow.device(device)),
        0,
        flow.tensor(index, dtype=flow.int, device=flow.device(device)),
    )
    test_case.assertTrue(np.array_equal(output.numpy(), np_out))
예제 #18
0
def flip(x, dim):
    xsize = x.size()
    dim = x.dim() + dim if dim < 0 else dim
    x = flow.reshape(x, shape=(-1, *xsize[dim:]))
    x = flow.reshape(x, shape=(x.size(0), x.size(1), -1))

    index = []
    index1 = []
    for i in range(x.size(1) - 1, -1, -1):
        index1.append([i] * x.size(2))
    for i in range(x.size(0)):
        index.append(index1)
    index = flow.Tensor(index).long()

    if x.is_cuda:
        x = flow.gather(x, index.to("cuda"), dim=1)
    else:
        x = flow.gather(x, index, dim=1)

    return flow.reshape(x, shape=xsize)
예제 #19
0
    def forward(self, sequence_output, masked_lm_positions):
        # Gather masked outputs
        masked_sequence_output = flow.gather(
            sequence_output,
            index=masked_lm_positions.unsqueeze(2).expand(-1, -1, self.hidden_size),
            dim=1,
        )
        masked_sequence_output = masked_sequence_output.reshape([-1, self.hidden_size])

        masked_sequence_output = self.transform(masked_sequence_output)
        masked_sequence_output = self.decoder(masked_sequence_output)
        return masked_sequence_output
예제 #20
0
 def do_gather(x_blob, i_blob):
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "params",
             shape=params.shape,
             dtype=flow.float32,
             initializer=flow.constant_initializer(0),
         )
         x = x + x_blob
         y = flow.gather(x, i_blob, axis=axis, batch_dims=batch_dims)
         flow.losses.add_loss(y)
     flow.watch_diff(x, compare_fn)
     return y
예제 #21
0
def variable_scope_test_job_2(a=of.FixedTensorDef((2, 5))):
    with of.scope.namespace("job2_scope1"):
        indices = of.get_variable(
            "gather_inds",
            shape=(2, ),
            dtype=of.int32,
            initializer=of.constant_initializer(1),
            trainable=False,
        )
        output = of.gather(a, indices, axis=1)

    print("indices op name: ", indices.op_name)
    print("gather op name: ", output.op_name)
    return output
예제 #22
0
 def forward(self, input):
     if self.dim == None:
         self.dim = -1
     num_axes = len(input.shape)
     axis = self.dim if self.dim >= 0 else self.dim + num_axes
     assert 0 <= axis < num_axes, "axis out of range"
     if axis == num_axes - 1:
         if self.largest:
             indices = flow._C.top_k(input, self.k)
         else:
             neg_input = flow.mul(input, -1)
             indices = flow._C.top_k(neg_input, self.k)
         return (flow.gather(input, axis, indices), indices)
     else:
         perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
         x = flow._C.transpose(input, perm=perm)
         if self.largest:
             indices = flow._C.top_k(x, self.k)
         else:
             neg_input = flow.mul(x, -1)
             indices = flow._C.top_k(neg_input, self.k)
         indices = flow._C.transpose(indices, perm=get_inversed_perm(perm))
         return (flow.gather(input, axis, indices), indices)
예제 #23
0
def _test_gather_backward(test_case, device):
    input = np.array([[1, 2], [3, 4]])
    index = np.array([[0, 0], [1, 0]])
    np_out = np.take_along_axis(input, index, 0)
    np_grad = _scatter_add_numpy(np.ones_like(np_out), 0, index, input.shape)
    of_input = flow.tensor(
        input, dtype=flow.float32, requires_grad=True, device=flow.device(device)
    )
    output = flow.gather(
        of_input, 0, flow.tensor(index, dtype=flow.int64, device=flow.device(device)),
    )
    out_sum = output.sum()
    out_sum.backward()
    test_case.assertTrue(np.array_equal(output.numpy(), np_out))
    test_case.assertTrue(np.array_equal(of_input.grad.numpy(), np_grad))
예제 #24
0
 def do_gather(x_blob, i_blob):
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "params",
             shape=params.shape,
             dtype=flow.float32,
             initializer=flow.constant_initializer(0),
         )
         x = x + x_blob
         y = flow.gather(x, i_blob, axis=axis, batch_dims=batch_dims)
         lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([],
                                                                  [1e-3])
         flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(y)
     flow.watch_diff(x, compare_fn)
     return y
예제 #25
0
파일: bert.py 프로젝트: zyg11/oneflow
def _EmbeddingPostprocessor(
    input_blob,
    seq_length,
    embedding_size,
    use_token_type=False,
    token_type_ids_blob=None,
    token_type_vocab_size=16,
    token_type_embedding_name="token_type_embeddings",
    use_position_embeddings=True,
    position_embedding_name="position_embeddings",
    initializer_range=0.02,
    max_position_embeddings=512,
    dropout_prob=0.1,
):
    output = input_blob

    if use_token_type:
        assert token_type_ids_blob is not None
        token_type_table = flow.get_variable(
            name=token_type_embedding_name,
            shape=[token_type_vocab_size, embedding_size],
            dtype=input_blob.dtype,
            initializer=CreateInitializer(initializer_range),
        )
        token_type_embeddings = flow.gather(
            params=token_type_table, indices=token_type_ids_blob, axis=0
        )
        output = output + token_type_embeddings

    if use_position_embeddings:
        position_table = flow.get_variable(
            name=position_embedding_name,
            shape=[1, max_position_embeddings, embedding_size],
            dtype=input_blob.dtype,
            initializer=CreateInitializer(initializer_range),
        )
        assert seq_length <= max_position_embeddings
        if seq_length != max_position_embeddings:
            position_table = flow.slice(
                position_table, begin=[None, 0, 0], size=[None, seq_length, -1]
            )
        output = output + position_table

    output = _LayerNorm(output, embedding_size)
    output = _Dropout(output, dropout_prob)

    return output
예제 #26
0
def _test_batch_gather(test_case, shape, device):
    # for example: shape = (3, 2, 2)
    x = np.random.randn(*shape)
    x_tensor = flow.Tensor(x).to(device)
    x_tensor.requires_grad = True
    batchsize = x.shape[0]
    init_index = np.array([
        np.random.randint(batchsize) for i in range(batchsize)
    ]).astype(np.int64)

    batch_gather_index = flow.tensor(init_index).to(device)
    batch_gather_out = flow.batch_gather(x_tensor, batch_gather_index)

    x_tensor_gather = flow.Tensor(x).to(device)
    x_tensor_gather.requires_grad = True
    reshaped_shape = [batchsize]  # reshaped_shape = [3]
    for i in range(len(x.shape) - 1):
        reshaped_shape.append(1)  # reshaped_shape = [3] -> [3, 1, 1]

    gather_index = np.reshape(init_index, reshaped_shape)
    gather_index = np.broadcast_to(gather_index, shape).astype(
        np.int64)  # [3, 1, 1] -> [3, 2, 2]
    gather_index = flow.tensor(gather_index).to(device)
    gather_out = flow.gather(x_tensor_gather, 0, gather_index)
    total_out = batch_gather_out.sum() + gather_out.sum()
    total_out.backward()

    test_case.assertTrue(
        np.allclose(batch_gather_out.numpy(),
                    gather_out.numpy(),
                    atol=1e-4,
                    rtol=1e-4))

    test_case.assertTrue(
        np.allclose(
            x_tensor.grad.numpy(),
            x_tensor_gather.grad.numpy(),
            atol=1e-4,
            rtol=1e-4,
        ))
    test_case.assertTrue(
        np.allclose(
            x_tensor.grad.numpy(),
            x_tensor_gather.grad.numpy(),
            atol=1e-4,
            rtol=1e-4,
        ))
예제 #27
0
def GPT(idx, config, target=None):
    b, t = idx.shape
    assert t <= config.block_size, "Cannot forward, model block size is exhausted."

    #forward the GPT model
    #token_embeddings = flow.layers.dense
    word_embedding = flow.get_variable(
        'word_emb',
        initializer=flow.random_normal_initializer(),
        shape=(config.vocab_size, config.n_embd))
    token_embeddings = flow.gather(word_embedding, idx)

    #positions embedding
    pos_emb = flow.get_variable(name='pos_emb',
                                shape=(1, config.block_size, config.n_embd),
                                dtype=flow.float32,
                                initializer=flow.zeros_initializer())
    #position_embeddings = fpos_emb[:, :t, :] # each position maps to a (learnable) vector
    position_embeddings = flow.slice(pos_emb, [None, 0, None], [None, t, None])
    x = flow.nn.dropout((token_embeddings + position_embeddings),
                        config.embd_pdrop)

    #Blocks
    for block_id in range(config.n_layer):
        with flow.scope.namespace('Block' + str(block_id)):
            x = Block(x, config)

    x = flow.layers.layer_norm(x, name='output_layernorm')

    logits = flow.layers.dense(x,
                               config.vocab_size,
                               use_bias=False,
                               activation=flow.zeros_initializer(),
                               name='output_logits')

    loss = None
    if target is not None:
        #TODO
        logits = flow.reshape(logits, [-1, config.vocab_size])
        target = flow.reshape(target, [-1])
        target = flow.one_hot(target,
                              depth=config.vocab_size,
                              dtype=flow.float32)
        loss = flow.nn.softmax_cross_entropy_with_logits(logits, target)
    return logits, loss
예제 #28
0
    def testIndexedSlicesSGD(
        sparse_ids: flow.typing.Numpy.Placeholder(ids_shape, dtype=flow.int32),
    ) -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            embedding_table = flow.get_variable(
                name="embeddings",
                shape=model_shape,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            embedding = flow.gather(params=embedding_table * mul_scalar,
                                    indices=sparse_ids)
            loss = flow.math.reduce_mean(embedding)
            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                momentum=momentum_beta,
            ).minimize(loss)

            return embedding_table
예제 #29
0
def EmbeddingLayer(input_ids_blob,
                   vocab_size,
                   embedding_size=128,
                   initializer_range=0.02,
                   word_embedding_name="Embedding_Layer"):
    """
    Embedding Layer
    :param input_ids_blob:The input ID Blob
    :param vocab_size: The input Vocab size
    :param embedding_size: The embedding Size
    :param initializer_range: The range of Initializer, Use flow.truncated_normal
    :param word_embedding_name: The name of Embedding variable
    :return: The output and the Embedding table.
    """
    embedding_table = flow.get_variable(name=word_embedding_name+"_Embed",
                                        shape=[vocab_size, embedding_size],
                                        dtype=flow.float32,
                                        initializer=flow.truncated_normal(initializer_range))
    output = flow.gather(params=embedding_table, indices=input_ids_blob, axis=0)
    return output
예제 #30
0
    def __call__(self, x):
        """
        Get embeddings of x
        :param x: An flow.int64 Tensor with shape [batchsize, length]
        :return: embeddings: float32 tensor with shape [batch_size, length, embedding_size]
                 padding: float32 tensor with shape [batch_size, length] indicating the
                 locations of the padding tokens in x.
        """
        with flow.scope.namespace("embedding"):
            embeddings = flow.gather(self.embedding_table, x, axis=0)

            # Scale embedding by the sqrt of the hidden size
            embeddings *= self.hidden_size**0.5

            # Create binary array of size [batch_size, length]
            # where 1 = padding, 0 = not padding
            padding = model_utils.get_padding(x)

            # Set all padding embedding values to 0
            embeddings *= flow.expand_dims(1 - padding, -1)
            return embeddings