Ejemplo n.º 1
0
 def _feed_batchs(_start_idx, _inpf):
     if _start_idx * n_samples_per_gpu >= n_samples:
         return 0
     x, x_len = padding_batch_data(
         d[_start_idx * n_samples_per_gpu:(_start_idx + 1) * n_samples_per_gpu], p)
     data["feed_dict"][_inpf[concat_name(n, Constants.IDS_NAME)]] = x
     data["feed_dict"][_inpf[concat_name(n, Constants.LENGTH_NAME)]] = x_len
     return len(x_len)
Ejemplo n.º 2
0
def compute_non_padding_num(input_fields, name_prefix):
    """ Computes non-padding num and total tokens num.

    Args:
        input_fields: A dict of placeholders.
        name_prefix: The key prefix name, Constants.FEATURE_NAME_PREFIX
          or Constants.LABEL_NAME_PREFIX

    Returns: A tuple (non-padding tokens num, total tokens num)
    """
    length = input_fields[concat_name(name_prefix, Constants.LENGTH_NAME)]
    ids = input_fields[concat_name(name_prefix, Constants.IDS_NAME)]
    nonpadding_tokens_num = tf.reduce_sum(length)
    shape = tf.shape(ids)
    total_tokens_num = shape[0] * shape[1]
    return nonpadding_tokens_num, total_tokens_num
Ejemplo n.º 3
0
def compute_non_padding_num(input_fields, name_prefix):
    """ Computes non-padding num and total tokens num.

    Args:
        input_fields: A dict of placeholders.
        name_prefix: The key prefix name, Constants.FEATURE_NAME_PREFIX
          or Constants.LABEL_NAME_PREFIX

    Returns: A tuple (non-padding tokens num, total tokens num)
    """
    length = input_fields[concat_name(name_prefix, Constants.LENGTH_NAME)]
    ids = input_fields[concat_name(name_prefix, Constants.IDS_NAME)]
    nonpadding_tokens_num = tf.reduce_sum(length)
    shape = tf.shape(ids)
    total_tokens_num = shape[0] * shape[1]
    return nonpadding_tokens_num, total_tokens_num
Ejemplo n.º 4
0
    def map_fn(n, d, p):
        # n: name prefix
        # d: data list
        # p: padding symbol
        data[concat_name(n, Constants.IDS_NAME)] = d
        n_samples = len(d)
        n_devices = len(input_fields)
        n_samples_per_gpu = n_samples // n_devices
        if n_samples % n_devices > 0:
            n_samples_per_gpu += 1

        def _feed_batchs(_start_idx, _inpf):

            if _start_idx * n_samples_per_gpu >= n_samples:
                return 0
            x, x_len = padding_batch_data(
                d[_start_idx * n_samples_per_gpu:(_start_idx + 1) *
                  n_samples_per_gpu], p)
            data["feed_dict"][_inpf[concat_name(n, Constants.IDS_NAME)]] = x
            data["feed_dict"][_inpf[concat_name(
                n, Constants.LENGTH_NAME)]] = x_len
            return len(x_len)

        parallels = repeat_n_times(n_devices, _feed_batchs,
                                   list(range(n_devices)), input_fields)
        data["feed_dict"]["parallels"] = parallels
Ejemplo n.º 5
0
 def map_fn(n, d, p):
     x, x_len = padding_batch_data(d, p)
     data[concat_name(n, Constants.IDS_NAME)] = d
     data["feed_dict"][input_fields[concat_name(n, Constants.IDS_NAME)]] = x
     data["feed_dict"][input_fields[concat_name(
         n, Constants.LENGTH_NAME)]] = x_len