Ejemplo n.º 1
0
    def __init__(self, params):
        super(BatchNormLayer, self).__init__(params)
        p = self.params
        assert p.name

        pc = py_utils.WeightParams(
            shape=[p.dim],
            init=py_utils.WeightInit.Constant(0.0),
            dtype=p.dtype,
            collections=[self.__class__.__name__ + '_vars'])

        with tf.variable_scope(p.name):
            if not p.use_moving_avg_in_training:
                self.CreateVariable('beta', pc)
                # Note, The real gamma to use is 1 + gamma.
                self.CreateVariable('gamma', pc, lambda x: 1.0 + x)

            # Two statistics.
            _, self._moving_mean = py_utils.CreateVariable('moving_mean',
                                                           pc,
                                                           trainable=False)

            pc = py_utils.WeightParams(
                shape=[p.dim],
                init=py_utils.WeightInit.Constant(1.0),
                dtype=p.dtype,
                collections=[self.__class__.__name__ + '_vars'])
            _, self._moving_variance = py_utils.CreateVariable(
                'moving_variance', pc, trainable=False)
        self._epsilon = 0.001
        self._decay = p.decay
Ejemplo n.º 2
0
  def testRenamingRules(self):
    pc = py_utils.WeightParams([3, 3])
    with tf.variable_scope('model'):
      _, v1 = py_utils.CreateVariable('v1', pc)
      with py_utils.VariableRenameScope([('model/(.*)', 'data/%s')]):
        _, v2 = py_utils.CreateVariable('v2', pc)
      _, v3 = py_utils.CreateVariable('v3', pc)

    self.assertTrue(v1.name == 'model/v1/var:0')
    self.assertTrue(v2.name == 'data/v2/var:0')
    self.assertTrue(v3.name == 'model/v3/var:0')
Ejemplo n.º 3
0
 def testNoConsting(self):
     with inference_graph_exporter.ConstGuaranteeScope():
         wp = py_utils.WeightParams(shape=[1],
                                    init=py_utils.WeightInit.Constant(0.0),
                                    dtype=tf.float32,
                                    collections=['v'])
         v = py_utils.CreateVariable('v', wp)
         self.assertEqual(tf.Tensor, type(v))
         with inference_graph_exporter.NoConstGuaranteeScope():
             v = py_utils.CreateVariable('v', wp, reuse=True)
             self.assertIsInstance(v, tf.Variable)
Ejemplo n.º 4
0
  def testCreateVariableDifferentSeed(self):
    with self.session(use_gpu=False) as sess:
      tf.set_random_seed(3251343)
      pc = py_utils.WeightParams([2, 3], py_utils.WeightInit.Gaussian())
      with tf.variable_scope('layer0'):
        w0, _ = py_utils.CreateVariable('w', pc)
      with tf.variable_scope('layer1'):
        w1, _ = py_utils.CreateVariable('w', pc)
      sess.run(tf.global_variables_initializer())

      # w0_val, w1_val should be sufficient different.
      w0_val, w1_val = sess.run([w0, w1])
      print(['diff = ', w0_val - w1_val])
      self.assertTrue(np.max(np.abs(w0_val - w1_val)) > 0.1)
 def __init__(self, params):
     super(LinearModel, self).__init__(params)
     p = self.params
     with tf.variable_scope(p.name):
         w = py_utils.WeightParams(shape=[3],
                                   init=py_utils.WeightInit.Gaussian(
                                       scale=1.0, seed=123456),
                                   dtype=p.dtype)
         b = py_utils.WeightParams(shape=[],
                                   init=py_utils.WeightInit.Gaussian(
                                       scale=1.0, seed=234567),
                                   dtype=p.dtype)
         self._w, _ = py_utils.CreateVariable('w', w)
         self._b, _ = py_utils.CreateVariable('b', b)
Ejemplo n.º 6
0
  def testCreateLocalTheta(self):
    methods = [py_utils.WeightInit.Gaussian, py_utils.WeightInit.Uniform]
    dtypes = [tf.float32, tf.complex64]
    shapes = [[2, 4], [3]]

    test_vars = py_utils.NestedMap()
    for i, (m, dt, sp) in enumerate(itertools.product(methods, dtypes, shapes)):
      pc = py_utils.WeightParams(sp, m(), dt, 'col1')
      test_vars['var_%d' % i] = py_utils.CreateVariable('var_%d' % i, pc)[0]

    test_devices = [
        '/job:worker/replica:0/device:GPU:0',
        '/job:worker/replica:0/device:GPU:1'
    ]

    sharded_local_vars = py_utils.CreateLocalTheta(test_vars, test_devices)
    sharded_local_vars_list = sharded_local_vars.Flatten()

    # assert the name is now Identity*
    for v in sharded_local_vars_list:
      self.assertTrue('Identity' in v.name)

    # assert proper device placement
    for i, v in enumerate(sharded_local_vars_list):
      expected_device = test_devices[i % len(test_devices)]
      self.assertEqual(v.device, expected_device)
Ejemplo n.º 7
0
  def testCreateVariableNormal(self):
    with self.session(use_gpu=False, graph=tf.Graph()):
      tf.set_random_seed(832124)
      methods = [
          py_utils.WeightInit.Gaussian,
          py_utils.WeightInit.GaussianSqrtDim,
      ]
      dtypes = [tf.float32, tf.complex64]
      shapes = [[2, 3]]
      all_vars = []
      for i, (dt, m, sp) in enumerate(
          itertools.product(dtypes, methods, shapes)):
        pc = py_utils.WeightParams(sp, m(), dt)
        all_vars.append(py_utils.CreateVariable('var_%d' % i, pc)[0])

      v1_v_expted = [[-1.472208, 0.960204, -0.192588],
                     [-0.461884, 1.018134, 0.063719]]
      v2_v_expted = [[-0.862255, -0.688153, 0.82515],
                     [-0.07671, 0.613031, -0.020327]]
      v3_v_expted = [
          [1.005469 + 0.827639j, 1.249896 + 0.802671j, -0.026286 - 0.813836j],
          [0.865386 + 0.301172j, 0.876698 - 0.907293j, 1.996337 + 1.840192j],
      ]

      tf.global_variables_initializer().run()
      v1_v = all_vars[0].eval()
      v2_v = all_vars[1].eval()
      v3_v = all_vars[2].eval()
      self.assertAllClose(v1_v_expted, v1_v.tolist())
      self.assertAllClose(v2_v_expted, v2_v.tolist())
      self.assertAllClose(v3_v_expted, v3_v.tolist())
Ejemplo n.º 8
0
    def CreateVariable(self, name, var_params, theta_fn=None, *args, **kwargs):
        """Create a variable of this layer according to the parameter `var_params`.

    E.g.::

        def __init__(self, ...):    # A layer's constructor
          self.CreateVariable(
              'weight', py_utils.WeightParams(shape=[100, 100]))

    `theta_fn` is used to apply a simple transformation on the created
    variable's value before used by the forward computation. E.g., to
    add the global variational noise according to this layer's
    parameter, one can do::

        def __init__(self, ...):    # A layer's constructor
          self.CreateVariable(
            name='weight',
            var_params=py_utils.WeightParams(shape=[100, 100]),
            theta_fn=self.AddGlobalVN)

    Args:
      name: Variable name which is used as the key into vars/theta.
      var_params: `Params` used to create the variable.
      theta_fn: A python function that takes a variable's value and returns a
        new value to be used later for computation. Its signature must be
        (tf.Tensor) -> (tf.Tensor).
      *args: List of args passed to `.py_utils.CreateVariable`.
      **kwargs: Keyword args passed to `.py_utils.CreateVariable`.
    """
        self._CheckName(name)
        value, var = py_utils.CreateVariable(name, var_params, *args, **kwargs)
        self._private_vars[name] = var
        if theta_fn is not None:
            value = theta_fn(value)
        self._private_theta[name] = value
Ejemplo n.º 9
0
    def _CreateVariableInternal(self, name, meta):
        """Immediately creates the variable described by `meta`.

    DO NOT OVERRIDE. For internal use only. Subclasses of BaseLayer should use
    self.CreateVariable() to create variables.

    Args:
      name: The variable name.
      meta: A CreateVariableMeta describing the variable to be created.
    """
        meta.kwargs.setdefault('default_seed', self.params.random_seed)
        var = py_utils.CreateVariable(name, meta.var_params, **meta.kwargs)
        self._private_vars[name] = var
        if self.cluster.params.worker.gpus_per_replica > 0:
            # On GPU (which always trains a single step per session.run()), reference
            # a tensor in FProp to cache it on device and avoid extraneous sends from
            # reading variables from ps multiple times.
            with tf.device(var.device):
                value = tf.identity(var)
        else:
            # Pass the resource variable directly into the training loop.
            value = var
        if meta.theta_fn is not None:
            value = meta.theta_fn(value)
        self._private_theta[name] = value
Ejemplo n.º 10
0
  def testCreateVariableUniform(self):
    with self.session(use_gpu=False, graph=tf.Graph()):
      tf.set_random_seed(12345678)
      methods = [
          py_utils.WeightInit.Uniform,
          py_utils.WeightInit.UniformSqrtDim,
          py_utils.WeightInit.UniformUnitScaling,
      ]
      dtypes = [tf.float32, tf.complex64]
      shapes = [[2, 3]]
      all_vars = []
      for i, (dt, m, sp) in enumerate(
          itertools.product(dtypes, methods, shapes)):
        pc = py_utils.WeightParams(sp, m(0.1), dt)
        all_vars.append(py_utils.CreateVariable('var_%d' % i, pc)[0])

      v1_v_expted = [[0.069674, -0.072278, -0.021777],
                     [-0.052155, -0.050274, 0.086218]]
      v2_v_expted = [[0.005361, 0.036109, -0.036575],
                     [0.058314, 0.031438, 0.049196]]
      v4_v_expted = [
          [0.015448 + 0.068295j, -0.098710 - 0.054435j, 0.037030 - 0.048017j],
          [-0.047435 + 0.035301j, 0.041994 + 0.000279j, -0.029097 + 0.084902j],
      ]

      tf.global_variables_initializer().run()
      v1_v = all_vars[0].eval()
      v2_v = all_vars[1].eval()
      v4_v = all_vars[3].eval()
      self.assertAllClose(v1_v_expted, v1_v.tolist())
      self.assertAllClose(v2_v_expted, v2_v.tolist())
      self.assertAllClose(v4_v_expted, v4_v.tolist())
Ejemplo n.º 11
0
  def testCreateVariableException(self):
    with self.session(use_gpu=False, graph=tf.Graph()):
      tf.set_random_seed(832124)
      pc = py_utils.WeightParams([2, 3], py_utils.WeightInit.Gaussian())
      var1 = py_utils.CreateVariable('var1', pc)[0]

      tf.get_variable_scope().reuse_variables()
      # Reuses an existing variable.
      var2 = py_utils.CreateVariable('var1', pc)[0]

      # An exception should be thrown in this case.
      pc = py_utils.WeightParams([2, 3], py_utils.WeightInit.Gaussian(2.0))
      with self.assertRaises(AssertionError):
        py_utils.CreateVariable('var1', pc)

      tf.global_variables_initializer().run()
      self.assertAllEqual(var1.eval(), var2.eval())
Ejemplo n.º 12
0
 def __init__(self, name):
   self._name = name
   _, self._var = py_utils.CreateVariable(
       name=name,
       params=py_utils.WeightParams([], py_utils.WeightInit.Constant(0),
                                    tf.int64),
       trainable=False)
   self._value = self._var.value() + 0  # Makes a copy.
Ejemplo n.º 13
0
 def _CreateQStateVar(self, t_name, suffix, params):
   name = t_name + '_' + suffix
   assert name not in self._qvars, 'QState var already exists: %s' % name
   var_name = self._qvars_scope.name + '/' + name
   with tf.variable_scope(py_utils.GetGlobalVariableScope()):
     v = py_utils.CreateVariable(var_name, params, trainable=False)
   self._qvars[name] = v
   return v
Ejemplo n.º 14
0
    def __init__(self, params):
        super(MergerLayer, self).__init__(params)
        p = self.params
        if not p.name:
            raise ValueError('Layer must have a specified name!')
        if p.merger_op not in set(self.MERGER_OPS):
            raise ValueError('Merger op must be one of: ', self.MERGER_OPS)

        if p.merger_op == 'atten':
            atten_params = p.attention_tpl.Copy()
            atten_params.source_dim = p.source_dim
            atten_params.query_dim = p.query_dim
            atten_params.hidden_dim = p.hidden_dim
            atten_params.dtype = p.dtype
            if atten_params.params_init is None:
                atten_params.params_init = py_utils.WeightInit.Gaussian(
                    1. / math.sqrt(atten_params.source_dim +
                                   atten_params.query_dim))
            self.CreateChild('atten', atten_params)

        if p.pre_proj_input_dims:
            if not p.pre_proj_output_dim:
                raise ValueError(
                    'Output dim should be specified for projection.')
            pre_proj_params = []
            for i, pre_proj_dim in enumerate(p.pre_proj_input_dims):
                proj_p = p.proj_tpl.Copy()
                proj_p.name = 'merger_pre_proj_%d' % i
                proj_p.input_dim = pre_proj_dim
                proj_p.output_dim = p.pre_proj_output_dim
                pre_proj_params.append(proj_p)
            self.CreateChildren('pre_proj', pre_proj_params)

        if p.merger_op == 'weighted_sum':
            assert p.num_sources > 0, (
                'For merger_op=weighted_sum, must specify '
                'num_sources > 0.')
            params_init = py_utils.WeightInit.Constant(1.0 / p.num_sources)
            # Weights to be learned.
            pw = py_utils.WeightParams(
                shape=[p.num_sources],
                init=params_init,
                dtype=p.dtype,
                collections=[self.__class__.__name__ + '_vars'])
            with tf.variable_scope(p.name):
                _, self._sum_weight = py_utils.CreateVariable('sum_weight', pw)

        if p.merger_op == 'gated_avg':
            assert p.num_sources > 0, ('For merger_op=gated_avg, must specify '
                                       'num_sources > 0.')
            params = p.gated_avg_tpl.Copy()
            params.name = 'g_avg_merger'
            params.num_nodes = p.source_dim
            params.num_inputs = p.num_sources
            self.CreateChild('gated_average', params)
Ejemplo n.º 15
0
def CreateTaskGlobalStep(params, task_name):
  """Create if needed and return the global_step."""
  with tf.name_scope(None), tf.variable_scope(py_utils.global_variable_scope):
    graph_collections = [tf.GraphKeys.GLOBAL_VARIABLES, 'TASK_GLOBAL_STEP']
    _, v = py_utils.CreateVariable(
        name=task_name + '_global_step',
        params=py_utils.WeightParams([], py_utils.WeightInit.Constant(0),
                                     tf.int64),
        trainable=False,
        collections=graph_collections)
    summary_utils.scalar(params, v.name, v)
    return v
Ejemplo n.º 16
0
  def __init__(self, params):
    super().__init__(params)

    p = self.params

    with tf.variable_scope(p.name):
      wp = py_utils.WeightParams(
          shape=[],
          init=py_utils.WeightInit.Constant(1.0),
          collections=['DevBasedSchedule_vars'],
          dtype=tf.float32)
      self._cur_factor = py_utils.CreateVariable(
          'cur_factor', wp, trainable=False)
      wp = py_utils.WeightParams(
          shape=[],
          init=py_utils.WeightInit.Constant(0),
          collections=['DevBasedSchedule_vars'],
          dtype=tf.int64)
      self._ref_step = py_utils.CreateVariable('ref_step', wp, trainable=False)
      self._metric_history = early_stop.MetricHistory(p.metric_history)
      self._best_step = ops.best_step(self._metric_history.hist_file,
                                      p.tolerance)
Ejemplo n.º 17
0
  def testCreateVariableBasics(self):
    with self.session(use_gpu=False, graph=tf.Graph()):
      methods = [
          py_utils.WeightInit.Gaussian,
          py_utils.WeightInit.Uniform,
          py_utils.WeightInit.Constant,
          py_utils.WeightInit.TruncatedGaussian,
          py_utils.WeightInit.GaussianSqrtDim,
          py_utils.WeightInit.UniformSqrtDim,
          py_utils.WeightInit.UniformUnitScaling,
          py_utils.WeightInit.TruncatedGaussianSqrtDim,
      ]
      dtypes = [tf.float32, tf.float64, tf.complex64]
      shapes = [[], [3], [2, 4]]
      collections = ['col1', 'col2']

      all_vars = []
      for i, (m, dt, sp) in enumerate(
          itertools.product(methods, dtypes, shapes)):
        pc = py_utils.WeightParams(sp, m(), dt, collections)
        all_vars.append(py_utils.CreateVariable('var_%d' % i, pc)[0])

      # To reuse existing variables
      tf.get_variable_scope().reuse_variables()

      self.assertEqual(len(tf.all_variables()), len(all_vars))

      all_vars_copy = []
      for i, (m, dt, sp) in enumerate(
          itertools.product(methods, dtypes, shapes)):
        pc = py_utils.WeightParams(sp, m(), dt, collections)
        all_vars_copy.append(py_utils.CreateVariable('var_%d' % i, pc)[0])

      tf.global_variables_initializer().run()
      for v1, v2 in zip(all_vars, all_vars_copy):
        v1_v = v1.eval()
        v2_v = v2.eval()
        self.assertAllEqual(v1_v, v2_v)
Ejemplo n.º 18
0
    def CreateVariable(self, name: str, var_params: hyperparams.Params,
                       **kwargs) -> None:
        """Create a variable of this layer according to the parameter `var_params`.

    E.g.::

        def __init__(self, ...):    # A layer's constructor
          self.CreateVariable(
              'weight', py_utils.WeightParams(shape=[100, 100]))

    Args:
      name: Variable name which is used as the key into vars/theta.
      var_params: `Params` used to create the variable.
      **kwargs: Keyword args passed to `.py_utils.CreateVariable`.
    """
        kwargs.setdefault('default_seed', self.params.random_seed)
        if self.params.device_mesh is not None:
            if (len([dim for dim in var_params.shape if dim > 1]) > 1
                    and var_params.tensor_split_dims_mapping is None):
                tf.logging.warning(
                    'tensor_split_dims_mapping missing for %s.%s: shape=%s',
                    self.path, name, var_params.shape)
        self._CheckName(name)
        if (self.params.skip_lp_regularization and
                py_utils.SKIP_LP_REGULARIZATION not in var_params.collections):
            var_params = py_utils.WeightParams(
                shape=var_params.shape,
                dtype=var_params.dtype,
                init=var_params.init,
                collections=(var_params.collections +
                             [py_utils.SKIP_LP_REGULARIZATION]))
        self._var_symbolic_shape_map[name] = var_params.shape

        var = py_utils.CreateVariable(name, var_params, **kwargs)
        self._private_vars[name] = var

        if py_utils.IsEagerMode():
            # With eager trainer, always use the variable directly.
            value = var
        else:
            if self.cluster.params.worker.gpus_per_replica > 0:
                # On GPU (which always trains a single step per session.run()),
                # reference a tensor in FProp to cache it on device and avoid extraneous
                # sends from reading variables from ps multiple times.
                with tf.device(var.device):
                    value = tf.identity(var, name=name)
            else:
                value = var

        self._private_theta[name] = value
Ejemplo n.º 19
0
        def _Acc(vg):
            """Updating accumulators."""

            v, g = vg
            with tf.variable_scope(v.op.name):
                _, a = py_utils.CreateVariable(
                    'grad_accumulator',
                    py_utils.WeightParams(v.get_shape(),
                                          py_utils.WeightInit.Constant(0.0),
                                          self.params.dtype),
                    trainable=False)
                a = tf.assign_add(a, g)

            return py_utils.VarGrad(v, a)
Ejemplo n.º 20
0
    def CreateVariable(self, name, var_params, theta_fn=None, *args, **kwargs):
        """Create a variable of this layer according to the parameter `var_params`.

    E.g.::

        def __init__(self, ...):    # A layer's constructor
          self.CreateVariable(
              'weight', py_utils.WeightParams(shape=[100, 100]))

    `theta_fn` is used to apply a simple transformation on the created
    variable's value before used by the forward computation. E.g., to
    add the global variational noise according to this layer's
    parameter, one can do::

        def __init__(self, ...):    # A layer's constructor
          self.CreateVariable(
            name='weight',
            var_params=py_utils.WeightParams(shape=[100, 100]),
            theta_fn=self.AddGlobalVN)

    Args:
      name: Variable name which is used as the key into vars/theta.
      var_params: `Params` used to create the variable.
      theta_fn: A python function that takes a variable's value and returns a
        new value to be used later for computation. Its signature must be
        (tf.Tensor) -> (tf.Tensor).
      *args: List of args passed to `.py_utils.CreateVariable`.
      **kwargs: Keyword args passed to `.py_utils.CreateVariable`.
    """
        self._CheckName(name)
        if (self.params.skip_lp_regularization and
                py_utils.SKIP_LP_REGULARIZATION not in var_params.collections):
            var_params = py_utils.WeightParams(
                shape=var_params.shape,
                dtype=var_params.dtype,
                init=var_params.init,
                collections=(var_params.collections +
                             [py_utils.SKIP_LP_REGULARIZATION]))
        self._var_symbolic_shape_map[name] = var_params.shape
        if (var_params.shape
                and any(symbolic.IsExpr(dim) for dim in var_params.shape)):
            var_params.shape = symbolic.EvalExpr(var_params.shape)
        value, var = py_utils.CreateVariable(name, var_params, *args, **kwargs)
        self._private_vars[name] = var
        if theta_fn is not None:
            value = theta_fn(value)
        self._private_theta[name] = value
Ejemplo n.º 21
0
  def _CreateVariable(self, name, meta):
    """Immediately creates the variable described by `meta`.

    DO NOT OVERRIDE. For internal use only. Subclasses of BaseLayer should use
    self.CreateVariable() to create variables.

    Args:
      name: The variable name.
      meta: A CreateVariableMeta describing the variable to be created.
    """
    with tf.variable_scope(meta.var_scope):
      meta.kwargs.setdefault('default_seed', self.params.random_seed)
      value, var = py_utils.CreateVariable(name, meta.var_params, **meta.kwargs)
      self._private_vars[name] = var
      if meta.theta_fn is not None:
        value = meta.theta_fn(value)
      self._private_theta[name] = value
Ejemplo n.º 22
0
        def _Acc(vg):
            """Updating accumulators."""

            v, g = vg
            scope_name = v.name
            if scope_name.endswith(':0'):
                scope_name = scope_name[:-2]
            with tf.variable_scope(scope_name):
                a = py_utils.CreateVariable(
                    'grad_accumulator',
                    py_utils.WeightParams(v.get_shape(),
                                          py_utils.WeightInit.Constant(0.0),
                                          self.params.dtype),
                    trainable=False)
                a = tf.assign_add(a, g)

            return py_utils.VarGrad(v, a)
Ejemplo n.º 23
0
  def testXavier3D(self):
    with self.session(use_gpu=False, graph=tf.Graph()):
      tf.set_random_seed(1618)
      methods = [py_utils.WeightInit.Xavier]
      dtypes = [tf.float32, tf.float16, tf.complex64]
      shapes = [[1, 1, 2]]
      all_vars = []
      for i, (m, dt, sp) in enumerate(
          itertools.product(methods, dtypes, shapes)):
        pc = py_utils.WeightParams(sp, m(), dt)
        all_vars.append(py_utils.CreateVariable('var_%d' % i, pc)[0])

      v1_v_expted = [[[1.357139, -1.23832]]]

      tf.global_variables_initializer().run()
      v1_v = all_vars[0].eval()
      self.assertAllClose(v1_v_expted, v1_v.tolist())
Ejemplo n.º 24
0
  def testOpportunisticReuse(self):
    pc = py_utils.WeightParams([3, 3])
    _, v1 = py_utils.CreateVariable('v1', pc)
    with self.assertRaises(Exception):
      _ = py_utils.CreateVariable('v1', pc)
    with py_utils.OpportunisticVariableReuseScope(True):
      _, v2 = py_utils.CreateVariable('v1', pc)
      _, x1 = py_utils.CreateVariable('x1', pc)
      with py_utils.OpportunisticVariableReuseScope(False):
        with self.assertRaises(Exception):
          _ = py_utils.CreateVariable('v1', pc)
      _, v3 = py_utils.CreateVariable('v1', pc)
    with self.assertRaises(Exception):
      _ = py_utils.CreateVariable('v1', pc)

    for v in [v2, v3]:
      self.assertTrue(v1 is v)
    self.assertTrue(v1 is not x1)
Ejemplo n.º 25
0
    def _CreateVariableInternal(self, name, meta):
        """Immediately creates the variable described by `meta`.

    DO NOT OVERRIDE. For internal use only. Subclasses of BaseLayer should use
    self.CreateVariable() to create variables.

    Args:
      name: The variable name.
      meta: A CreateVariableMeta describing the variable to be created.
    """
        meta.kwargs.setdefault('default_seed', self.params.random_seed)
        var = py_utils.CreateVariable(name, meta.var_params, **meta.kwargs)
        self._private_vars[name] = var
        if FLAGS.no_identity_on_vars:
            value = var
        else:
            with tf.device(var.device):
                value = tf.identity(var)
        if meta.theta_fn is not None:
            value = meta.theta_fn(value)
        self._private_theta[name] = value
Ejemplo n.º 26
0
  def _CreateVariableInternal(self, name: str,
                              meta: CreateVariableMeta) -> None:
    """Immediately creates the variable described by `meta`.

    DO NOT OVERRIDE. For internal use only. Subclasses of BaseLayer should use
    self.CreateVariable() to create variables.

    Args:
      name: The variable name.
      meta: A CreateVariableMeta describing the variable to be created.
    """
    meta.kwargs.setdefault('default_seed', self.params.random_seed)
    var = py_utils.CreateVariable(name, meta.var_params, **meta.kwargs)
    self._private_vars[name] = var
    if self.cluster.params.worker.gpus_per_replica > 0:
      # On GPU (which always trains a single step per session.run()), reference
      # a tensor in FProp to cache it on device and avoid extraneous sends from
      # reading variables from ps multiple times.
      with tf.device(var.device):
        value = tf.identity(var)
    else:
      # Pass the resource variable directly into the training loop.
      value = var

    # Due to b/174956514, we have to annotate the use of the variable once,
    # otherwise, the sharding annotation on the var will be ignored.
    # TODO(yonghui): Get rid of this once b/174956514 is fixed.
    if (meta.var_params.device_mesh is not None and
        var.shape.rank == len(meta.var_params.tensor_split_dims_mapping)):
      value = gshard_utils.MeshSplit(
          value,
          meta.var_params.device_mesh,
          meta.var_params.tensor_split_dims_mapping,
          use_sharding_op=True)

    if meta.theta_fn is not None:
      self._private_theta_fn[name] = meta.theta_fn

    self._private_theta[name] = value
Ejemplo n.º 27
0
  def testXavier(self):
    with self.session(use_gpu=False, graph=tf.Graph()):
      tf.set_random_seed(1618)
      methods = [py_utils.WeightInit.Xavier]
      dtypes = [tf.float32, tf.float16, tf.complex64]
      shapes = [[2, 3]]
      all_vars = []
      for i, (m, dt, sp) in enumerate(
          itertools.product(methods, dtypes, shapes)):
        pc = py_utils.WeightParams(sp, m(), dt)
        all_vars.append(py_utils.CreateVariable('var_%d' % i, pc)[0])

      v1_v_expted = [[1.051236, -0.959198, 0.796091],
                     [-0.685691, 0.230933, -1.006293]]
      v3_v_expted = [
          [0.149996 - 0.064369j, 0.689145 + 0.017257j, -0.502070 - 0.367683j],
          [0.519782 + 0.470412j, 0.738902 - 0.054006j, 0.028603 + 0.471832j],
      ]

      tf.global_variables_initializer().run()
      v1_v = all_vars[0].eval()
      v3_v = all_vars[2].eval()
      self.assertAllClose(v1_v_expted, v1_v.tolist())
      self.assertAllClose(v3_v_expted, v3_v.tolist())