Example #1
0
  def __new__(cls,
              iterations_per_loop=2,
              num_shards=2,
              per_host_input_for_training=True):

    # Check iterations_per_loop.
    util_lib.check_positive_integer(iterations_per_loop,
                                    'TPUConfig iterations_per_loop')

    # Check num_shards.
    util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

    return super(TPUConfig, cls).__new__(
        cls,
        iterations_per_loop=iterations_per_loop,
        num_shards=num_shards,
        per_host_input_for_training=per_host_input_for_training)
Example #2
0
  def __new__(cls,
              iterations_per_loop=2,
              num_shards=2,
              per_host_input_for_training=False):

    # Check iterations_per_loop.
    util_lib.check_positive_integer(iterations_per_loop,
                                    'TPUConfig iterations_per_loop')

    # Check num_shards.
    util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

    return super(TPUConfig, cls).__new__(
        cls,
        iterations_per_loop=iterations_per_loop,
        num_shards=num_shards,
        per_host_input_for_training=per_host_input_for_training)
Example #3
0
  def __new__(cls,
              iterations_per_loop=2,
              num_shards=2,
              per_host_input_for_training=True,
              tpu_job_name=None,
              initial_infeed_sleep_secs=None):

    # Check iterations_per_loop.
    util_lib.check_positive_integer(iterations_per_loop,
                                    'TPUConfig iterations_per_loop')

    # Check num_shards.
    util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

    # Check initial_infeed_sleep_secs.
    if initial_infeed_sleep_secs:
      util_lib.check_positive_integer(initial_infeed_sleep_secs,
                                      'TPUConfig initial_infeed_sleep_secs')

    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

    return super(TPUConfig, cls).__new__(
        cls,
        iterations_per_loop=iterations_per_loop,
        num_shards=num_shards,
        per_host_input_for_training=per_host_input_for_training,
        tpu_job_name=tpu_job_name,
        initial_infeed_sleep_secs=initial_infeed_sleep_secs)
Example #4
0
    def __new__(cls,
                iterations_per_loop=2,
                num_shards=2,
                per_host_input_for_training=True,
                tpu_job_name=None,
                initial_infeed_sleep_secs=None):

        # Check iterations_per_loop.
        util_lib.check_positive_integer(iterations_per_loop,
                                        'TPUConfig iterations_per_loop')

        # Check num_shards.
        util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

        # Check initial_infeed_sleep_secs.
        if initial_infeed_sleep_secs:
            util_lib.check_positive_integer(
                initial_infeed_sleep_secs,
                'TPUConfig initial_infeed_sleep_secs')

        tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

        return super(TPUConfig, cls).__new__(
            cls,
            iterations_per_loop=iterations_per_loop,
            num_shards=num_shards,
            per_host_input_for_training=per_host_input_for_training,
            tpu_job_name=tpu_job_name,
            initial_infeed_sleep_secs=initial_infeed_sleep_secs)
Example #5
0
  def __new__(cls,
              iterations_per_loop=2,
              num_shards=2,
              computation_shape=None,
              per_host_input_for_training=True,
              tpu_job_name=None,
              initial_infeed_sleep_secs=None):

    # Check iterations_per_loop.
    util_lib.check_positive_integer(iterations_per_loop,
                                    'TPUConfig iterations_per_loop')

    # Check num_shards.
    util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

    # Check computation_shape
    if computation_shape is not None and len(computation_shape) != 3:
      raise ValueError(
          'computation_shape must be a list with length 3 or None; got {}'.
          format(str(computation_shape)))

    if computation_shape is not None:
      computation_shape_array = np.asarray(computation_shape, dtype=np.int32)
      # This prevents any computation being replicated across multiple hosts, so
      # that each host feeds the same number of computations.
      if any(computation_shape_array < 1) or any(computation_shape_array > 2):
        raise ValueError('computation_shape elements can only be 1 or 2; got '
                         'computation_shape={}'.format(computation_shape))
      max_replicas_per_host = (
          _NUM_CORES_PER_HOST // np.prod(computation_shape_array))
      if num_shards > max_replicas_per_host and (
          num_shards % max_replicas_per_host != 0):
        raise ValueError(
            '{0} shards can not be evenly distributed across'
            ' multiple hosts. Each shard needs {1} cores and each'
            ' host has {2} cores. Thus {0} shards needs {3} hosts.'
            ' Please adjust num shards so that num_shards is'
            ' divisible by {4} or <= {4}.'.format(
                num_shards, np.prod(computation_shape), _NUM_CORES_PER_HOST,
                num_shards / max_replicas_per_host, max_replicas_per_host))

    # Check initial_infeed_sleep_secs.
    if initial_infeed_sleep_secs:
      util_lib.check_positive_integer(initial_infeed_sleep_secs,
                                      'TPUConfig initial_infeed_sleep_secs')

    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

    return super(TPUConfig, cls).__new__(
        cls,
        iterations_per_loop=iterations_per_loop,
        num_shards=num_shards,
        computation_shape=computation_shape,
        per_host_input_for_training=per_host_input_for_training,
        tpu_job_name=tpu_job_name,
        initial_infeed_sleep_secs=initial_infeed_sleep_secs)
Example #6
0
    def __new__(cls,
                iterations_per_loop=2,
                num_shards=None,
                computation_shape=None,
                per_host_input_for_training=True,
                tpu_job_name=None,
                initial_infeed_sleep_secs=None):

        # Check iterations_per_loop.
        util_lib.check_positive_integer(iterations_per_loop,
                                        'TPUConfig iterations_per_loop')

        # Check num_shards.
        if num_shards is not None:
            util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

        # Check computation_shape
        if computation_shape is not None and len(computation_shape) != 3:
            raise ValueError(
                'computation_shape must be a list with length 3 or None; got {}'
                .format(str(computation_shape)))

        if computation_shape is not None:
            computation_shape_array = np.asarray(computation_shape,
                                                 dtype=np.int32)
            # This prevents any computation being replicated across multiple hosts, so
            # that each host feeds the same number of computations.
            if any(computation_shape_array < 1) or any(
                    computation_shape_array > 2):
                raise ValueError(
                    'computation_shape elements can only be 1 or 2; got '
                    'computation_shape={}'.format(computation_shape))

        # per_host_input_for_training may be True, False, or integer in [1..3].
        # Map legacy values (True, False) to numeric values.
        if per_host_input_for_training is False:
            per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
        elif per_host_input_for_training is True:
            per_host_input_for_training = InputPipelineConfig.PER_HOST_V1

        # Check initial_infeed_sleep_secs.
        if initial_infeed_sleep_secs:
            util_lib.check_positive_integer(
                initial_infeed_sleep_secs,
                'TPUConfig initial_infeed_sleep_secs')

        tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

        return super(TPUConfig, cls).__new__(
            cls,
            iterations_per_loop=iterations_per_loop,
            num_shards=num_shards,
            computation_shape=computation_shape,
            per_host_input_for_training=per_host_input_for_training,
            tpu_job_name=tpu_job_name,
            initial_infeed_sleep_secs=initial_infeed_sleep_secs)
Example #7
0
  def __new__(cls,
              iterations_per_loop=2,
              num_shards=None,
              computation_shape=None,
              per_host_input_for_training=True,
              tpu_job_name=None,
              initial_infeed_sleep_secs=None):

    # Check iterations_per_loop.
    util_lib.check_positive_integer(iterations_per_loop,
                                    'TPUConfig iterations_per_loop')

    # Check num_shards.
    if num_shards is not None:
      util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

    # Check computation_shape
    if computation_shape is not None and len(computation_shape) != 3:
      raise ValueError(
          'computation_shape must be a list with length 3 or None; got {}'.
          format(str(computation_shape)))

    if computation_shape is not None:
      computation_shape_array = np.asarray(computation_shape, dtype=np.int32)
      # This prevents any computation being replicated across multiple hosts, so
      # that each host feeds the same number of computations.
      if any(computation_shape_array < 1) or any(computation_shape_array > 2):
        raise ValueError('computation_shape elements can only be 1 or 2; got '
                         'computation_shape={}'.format(computation_shape))

    # per_host_input_for_training may be True, False, or integer in [1..3].
    # Map legacy values (True, False) to numeric values.
    if per_host_input_for_training is False:
      per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
    elif per_host_input_for_training is True:
      per_host_input_for_training = InputPipelineConfig.PER_HOST_V1

    # Check initial_infeed_sleep_secs.
    if initial_infeed_sleep_secs:
      util_lib.check_positive_integer(initial_infeed_sleep_secs,
                                      'TPUConfig initial_infeed_sleep_secs')

    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

    return super(TPUConfig, cls).__new__(
        cls,
        iterations_per_loop=iterations_per_loop,
        num_shards=num_shards,
        computation_shape=computation_shape,
        per_host_input_for_training=per_host_input_for_training,
        tpu_job_name=tpu_job_name,
        initial_infeed_sleep_secs=initial_infeed_sleep_secs)
Example #8
0
    def __new__(cls,
                iterations_per_loop=2,
                num_shards=None,
                num_cores_per_replica=None,
                per_host_input_for_training=True,
                tpu_job_name=None,
                initial_infeed_sleep_secs=None):

        # Check iterations_per_loop.
        util_lib.check_positive_integer(iterations_per_loop,
                                        'TPUConfig iterations_per_loop')

        # Check num_shards.
        if num_shards is not None:
            util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

        # Parse computation_shape
        if num_cores_per_replica is not None:
            if num_cores_per_replica not in [1, 2, 4, 8]:
                raise ValueError(
                    'num_cores_per_replica must be 1, 2, 4, or 8; got {}'.
                    format(str(num_cores_per_replica)))

        # per_host_input_for_training may be True, False, or integer in [1..3].
        # Map legacy values (True, False) to numeric values.
        if per_host_input_for_training is False:
            per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
        elif per_host_input_for_training is True:
            per_host_input_for_training = InputPipelineConfig.PER_HOST_V1

        # Check initial_infeed_sleep_secs.
        if initial_infeed_sleep_secs:
            util_lib.check_positive_integer(
                initial_infeed_sleep_secs,
                'TPUConfig initial_infeed_sleep_secs')

        tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

        return super(TPUConfig, cls).__new__(
            cls,
            iterations_per_loop=iterations_per_loop,
            num_shards=num_shards,
            num_cores_per_replica=num_cores_per_replica,
            per_host_input_for_training=per_host_input_for_training,
            tpu_job_name=tpu_job_name,
            initial_infeed_sleep_secs=initial_infeed_sleep_secs)
Example #9
0
  def __new__(cls,
              iterations_per_loop=2,
              num_shards=None,
              num_cores_per_replica=None,
              per_host_input_for_training=True,
              tpu_job_name=None,
              initial_infeed_sleep_secs=None):

    # Check iterations_per_loop.
    util_lib.check_positive_integer(iterations_per_loop,
                                    'TPUConfig iterations_per_loop')

    # Check num_shards.
    if num_shards is not None:
      util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

    # Parse computation_shape
    if num_cores_per_replica is not None:
      if num_cores_per_replica not in [1, 2, 4, 8]:
        raise ValueError(
            'num_cores_per_replica must be 1, 2, 4, or 8; got {}'.format(
                str(num_cores_per_replica)))

    # per_host_input_for_training may be True, False, or integer in [1..3].
    # Map legacy values (True, False) to numeric values.
    if per_host_input_for_training is False:
      per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
    elif per_host_input_for_training is True:
      per_host_input_for_training = InputPipelineConfig.PER_HOST_V1

    # Check initial_infeed_sleep_secs.
    if initial_infeed_sleep_secs:
      util_lib.check_positive_integer(initial_infeed_sleep_secs,
                                      'TPUConfig initial_infeed_sleep_secs')

    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

    return super(TPUConfig, cls).__new__(
        cls,
        iterations_per_loop=iterations_per_loop,
        num_shards=num_shards,
        num_cores_per_replica=num_cores_per_replica,
        per_host_input_for_training=per_host_input_for_training,
        tpu_job_name=tpu_job_name,
        initial_infeed_sleep_secs=initial_infeed_sleep_secs)
    def __new__(cls,
                iterations_per_loop=2,
                num_shards=None,
                num_cores_per_replica=None,
                per_host_input_for_training=True,
                tpu_job_name=None,
                initial_infeed_sleep_secs=None,
                input_partition_dims=None):

        # Check iterations_per_loop.
        util_lib.check_positive_integer(iterations_per_loop,
                                        'TPUConfig iterations_per_loop')

        # Check num_shards.
        if num_shards is not None:
            util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

        if input_partition_dims is not None:
            if len(input_partition_dims) != 1 and len(
                    input_partition_dims) != 2:
                raise ValueError(
                    'input_partition_dims must be a list/tuple with one or two'
                    ' elements.')

            if per_host_input_for_training is not InputPipelineConfig.PER_HOST_V2:
                raise ValueError(
                    'input_partition_dims is only supported in PER_HOST_V2 mode.'
                )

            if num_cores_per_replica is None:
                raise ValueError(
                    'input_partition_dims requires setting num_cores_per_replica.'
                )

        # Check num_cores_per_replica
        if num_cores_per_replica is not None:
            if num_cores_per_replica not in [1, 2, 4, 8]:
                raise ValueError(
                    'num_cores_per_replica must be 1, 2, 4, or 8; got {}'.
                    format(str(num_cores_per_replica)))

        # per_host_input_for_training may be True, False, or integer in [1..3].
        # Map legacy values (True, False) to numeric values.
        if per_host_input_for_training is False:
            per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
        elif per_host_input_for_training is True:
            per_host_input_for_training = InputPipelineConfig.PER_HOST_V1

        # Check initial_infeed_sleep_secs.
        if initial_infeed_sleep_secs:
            util_lib.check_positive_integer(
                initial_infeed_sleep_secs,
                'TPUConfig initial_infeed_sleep_secs')

        tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

        return super(TPUConfig, cls).__new__(
            cls,
            iterations_per_loop=iterations_per_loop,
            num_shards=num_shards,
            num_cores_per_replica=num_cores_per_replica,
            per_host_input_for_training=per_host_input_for_training,
            tpu_job_name=tpu_job_name,
            initial_infeed_sleep_secs=initial_infeed_sleep_secs,
            input_partition_dims=input_partition_dims)
Example #11
0
    def __new__(cls,
                iterations_per_loop=2,
                num_shards=2,
                computation_shape=None,
                per_host_input_for_training=True,
                tpu_job_name=None,
                initial_infeed_sleep_secs=None):

        # Check iterations_per_loop.
        util_lib.check_positive_integer(iterations_per_loop,
                                        'TPUConfig iterations_per_loop')

        # Check num_shards.
        util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

        # Check computation_shape
        if computation_shape is not None and len(computation_shape) != 3:
            raise ValueError(
                'computation_shape must be a list with length 3 or None; got {}'
                .format(str(computation_shape)))

        if computation_shape is not None:
            computation_shape_array = np.asarray(computation_shape,
                                                 dtype=np.int32)
            # This prevents any computation being replicated across multiple hosts, so
            # that each host feeds the same number of computations.
            if any(computation_shape_array < 1) or any(
                    computation_shape_array > 2):
                raise ValueError(
                    'computation_shape elements can only be 1 or 2; got '
                    'computation_shape={}'.format(computation_shape))
            max_replicas_per_host = (_NUM_CORES_PER_HOST //
                                     np.prod(computation_shape_array))
            if num_shards > max_replicas_per_host and (
                    num_shards % max_replicas_per_host != 0):
                raise ValueError(
                    '{0} shards can not be evenly distributed across'
                    ' multiple hosts. Each shard needs {1} cores and each'
                    ' host has {2} cores. Thus {0} shards needs {3} hosts.'
                    ' Please adjust num shards so that num_shards is'
                    ' divisible by {4} or <= {4}.'.format(
                        num_shards, np.prod(computation_shape),
                        _NUM_CORES_PER_HOST,
                        num_shards / max_replicas_per_host,
                        max_replicas_per_host))

        # Check initial_infeed_sleep_secs.
        if initial_infeed_sleep_secs:
            util_lib.check_positive_integer(
                initial_infeed_sleep_secs,
                'TPUConfig initial_infeed_sleep_secs')

        tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

        return super(TPUConfig, cls).__new__(
            cls,
            iterations_per_loop=iterations_per_loop,
            num_shards=num_shards,
            computation_shape=computation_shape,
            per_host_input_for_training=per_host_input_for_training,
            tpu_job_name=tpu_job_name,
            initial_infeed_sleep_secs=initial_infeed_sleep_secs)
Example #12
0
  def __new__(cls,
              iterations_per_loop=2,
              num_shards=None,
              num_cores_per_replica=None,
              per_host_input_for_training=True,
              tpu_job_name=None,
              initial_infeed_sleep_secs=None,
              input_partition_dims=None):

    # Check iterations_per_loop.
    util_lib.check_positive_integer(iterations_per_loop,
                                    'TPUConfig iterations_per_loop')

    # Check num_shards.
    if num_shards is not None:
      util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')

    if input_partition_dims is not None:
      if len(input_partition_dims) != 1 and len(input_partition_dims) != 2:
        raise ValueError(
            'input_partition_dims must be a list/tuple with one or two'
            ' elements.')

      if per_host_input_for_training is not InputPipelineConfig.PER_HOST_V2:
        raise ValueError(
            'input_partition_dims is only supported in PER_HOST_V2 mode.')

      if num_cores_per_replica is None:
        raise ValueError(
            'input_partition_dims requires setting num_cores_per_replica.')

    # Check num_cores_per_replica
    if num_cores_per_replica is not None:
      if num_cores_per_replica not in [1, 2, 4, 8]:
        raise ValueError(
            'num_cores_per_replica must be 1, 2, 4, or 8; got {}'.format(
                str(num_cores_per_replica)))

    # per_host_input_for_training may be True, False, or integer in [1..3].
    # Map legacy values (True, False) to numeric values.
    if per_host_input_for_training is False:
      per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1
    elif per_host_input_for_training is True:
      per_host_input_for_training = InputPipelineConfig.PER_HOST_V1

    # Check initial_infeed_sleep_secs.
    if initial_infeed_sleep_secs:
      util_lib.check_positive_integer(initial_infeed_sleep_secs,
                                      'TPUConfig initial_infeed_sleep_secs')

    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()

    return super(TPUConfig, cls).__new__(
        cls,
        iterations_per_loop=iterations_per_loop,
        num_shards=num_shards,
        num_cores_per_replica=num_cores_per_replica,
        per_host_input_for_training=per_host_input_for_training,
        tpu_job_name=tpu_job_name,
        initial_infeed_sleep_secs=initial_infeed_sleep_secs,
        input_partition_dims=input_partition_dims)