def model_fn():
        a = constant_op.constant([3.0, 5.0])
        # The device scope is ignored for variables but not for normal ops.
        with ops.device('/job:worker/task:0'):
          x = variable_scope.get_variable(
              'x',
              initializer=constant_op.constant([10.0, 20.0]),
              aggregation=variable_scope.VariableAggregation.SUM,
              partitioner=partitioner)
          x_add = x.assign_add(a, name='x_add')
        # The variable x is on the task 1 since the device_function has been
        # called once before the model_fn.
        for part_id, var in enumerate(x):
          self.assertEqual(var.device, '/job:ps/task:%d' % part_id)
          self.assertEqual(var.device, x_add[part_id].device)

        # The colocate_vars_with can override the distribution's device.
        with d.colocate_vars_with(x_add[0]):
          y = variable_scope.get_variable(
              'y',
              initializer=constant_op.constant([20.0, 10.0]),
              aggregation=variable_scope.VariableAggregation.SUM,
              partitioner=partitioner)
        y_add = y.assign_add(
            [array_ops.identity(x_add[0]),
             array_ops.identity(x_add[1])])

        for part_id, var in enumerate(y):
          self.assertEqual(var.device, '/job:ps/task:0')
          self.assertEqual(y_add[part_id].device, var.device)
          self.assertEqual(var.device, x_add[0].device)

        return x_add, y_add
  def testInitFromCheckpoint(self):
    checkpoint_dir = self.get_temp_dir()
    with self.test_session() as session:
      v1, v2, v3, v4 = _create_checkpoints(session, checkpoint_dir)

    # New graph and session.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as session:
        with variable_scope.variable_scope("some_scope"):
          my1 = variable_scope.get_variable("my1", [1, 10])
          with variable_scope.variable_scope("some_other_scope"):
            my2 = variable_scope.get_variable("my2", [10, 10])
            with variable_scope.variable_scope("other_useful_scope"):
              my4 = variable_scope.get_variable("var4", [9, 9])
        my3 = variable_scope.get_variable("my3", [100, 100])

        checkpoint_utils.init_from_checkpoint(checkpoint_dir, {
            "var1": "some_scope/my1",
            "useful_scope/": "some_scope/some_other_scope/other_useful_scope/",
        })
        checkpoint_utils.init_from_checkpoint(checkpoint_dir, {
            "var2": "some_scope/some_other_scope/my2",
            "var3": my3,
        })

        session.run(variables.global_variables_initializer())
        self.assertAllEqual(my1.eval(session), v1)
        self.assertAllEqual(my2.eval(session), v2)
        self.assertAllEqual(my3.eval(session), v3)
        self.assertAllEqual(my4.eval(session), v4)

        # Check that tensors are not explicitly in the graph.
        self.assertLess(len(str(session.graph.as_graph_def())), 29000)
    def _DenseLayer(x, num_inputs, num_outputs, quantization_range, name):
      """Dense layer with quantized outputs.

      Args:
        x: input to the dense layer
        num_inputs: number of input columns of x
        num_outputs: number of output columns
        quantization_range: the min/max range for quantization
        name: name of the variable scope

      Returns:
        The output of the layer.
      """
      with variable_scope.variable_scope(name):
        kernel = variable_scope.get_variable(
            'kernel',
            shape=[num_inputs, num_outputs],
            dtype=dtypes.float32,
            initializer=keras.initializers.glorot_uniform())
        bias = variable_scope.get_variable(
            'bias',
            shape=[num_outputs],
            dtype=dtypes.float32,
            initializer=keras.initializers.zeros())
        x = math_ops.matmul(x, kernel)
        x = _Quantize(x, quantization_range)
        x = nn.bias_add(x, bias)
        x = _Quantize(x, quantization_range)
      return x
Example #4
0
  def register_option2quants(self, damping):

    self.register_cov_dt1()

    if damping not in self._option2quants_by_damping:
      # It's questionable as to whether we should initialize with stuff like
      # this at all.  Ideally these values should never be used until they are
      # updated at least once.
      damping_string = scalar_or_tensor_to_string(damping)
      with variable_scope.variable_scope(self._var_scope):
        Pmat = variable_scope.get_variable(  # pylint: disable=invalid-name
            "Lmat_damp{}".format(damping_string),
            initializer=inverse_initializer,
            shape=self._cov_shape,
            trainable=False,
            dtype=self._dtype)
        Kmat = variable_scope.get_variable(  # pylint: disable=invalid-name
            "Kmat_damp{}".format(damping_string),
            initializer=inverse_initializer,
            shape=self._cov_shape,
            trainable=False,
            dtype=self._dtype)
        mu = variable_scope.get_variable(
            "mu_damp{}".format(damping_string),
            initializer=init_ops.ones_initializer,
            shape=self._vec_shape,
            trainable=False,
            dtype=self._dtype)

      self._option2quants_by_damping[damping] = (Pmat, Kmat, mu)
Example #5
0
def _auc_hist_accumulate(hist_true, hist_false, nbins, collections):
  """Accumulate histograms in new variables."""
  with variable_scope.variable_op_scope(
      [hist_true, hist_false], None, 'hist_accumulate'):
    # Holds running total histogram of scores for records labeled True.
    hist_true_acc = variable_scope.get_variable(
        'hist_true_acc',
        initializer=array_ops.zeros_initializer(
            [nbins],
            dtype=hist_true.dtype),
        collections=collections,
        trainable=False)
    # Holds running total histogram of scores for records labeled False.
    hist_false_acc = variable_scope.get_variable(
        'hist_false_acc',
        initializer=array_ops.zeros_initializer(
            [nbins],
            dtype=hist_false.dtype),
        collections=collections,
        trainable=False)

    update_op = control_flow_ops.group(
        hist_true_acc.assign_add(hist_true),
        hist_false_acc.assign_add(hist_false),
        name='update_op')

    return hist_true_acc, hist_false_acc, update_op
  def _between_graph_with_monitored_session(self, strategy):
    context = distribute_coordinator_context.get_current_worker_context()
    self.assertTrue(context is not None)
    with ops.device("/job:ps/task:0"):
      # TODO(yuefengz): investigate why not using resource variable will make
      # the test flaky.
      x = variable_scope.get_variable("xx", initializer=10.0, use_resource=True)
    with ops.device("/job:ps/task:1"):
      y = variable_scope.get_variable("yy", initializer=20.0, use_resource=True)

    x_add = x.assign_add(2.0)
    y_sub = y.assign_sub(2.0)
    train_op = control_flow_ops.group([x_add, y_sub])

    # The monitored session will run init or ready ops.
    with monitored_session.MonitoredSession() as sess:
      sess.run(train_op)

      # Synchronize workers after one step to make sure they all have finished
      # training.
      if context.has_barrier:
        context.wait_for_other_workers()
      else:
        self._barrier.wait()

      x_val, y_val = sess.run([x, y])

    self.assertEqual(x_val, 16.0)
    self.assertEqual(y_val, 14.0)
    if x_val == 16.0 and y_val == 14.0:
      with self._lock:
        self._result_correct += 1
Example #7
0
  def __call__(self, x, states_prev, scope=None):
    """Long short-term memory cell (LSTM)."""
    with vs.variable_scope(scope or self._names["scope"]):
      x_shape = x.get_shape().with_rank(2)
      if not x_shape[1]:
        raise ValueError("Expecting x_shape[1] to be sets: %s" % str(x_shape))
      if len(states_prev) != 2:
        raise ValueError("Expecting states_prev to be a tuple with length 2.")
      input_size = x_shape[1]
      w = vs.get_variable(self._names["W"], [input_size + self._num_units,
                                             self._num_units * 4])
      b = vs.get_variable(
          self._names["b"], [w.get_shape().with_rank(2)[1]],
          initializer=init_ops.constant_initializer(0.0))
      if self._use_peephole:
        wci = vs.get_variable(self._names["wci"], [self._num_units])
        wco = vs.get_variable(self._names["wco"], [self._num_units])
        wcf = vs.get_variable(self._names["wcf"], [self._num_units])
      else:
        wci = wco = wcf = array_ops.zeros([self._num_units])
      (cs_prev, h_prev) = states_prev
      (_, cs, _, _, _, _, h) = _lstm_block_cell(
          x,
          cs_prev,
          h_prev,
          w,
          b,
          wci=wci,
          wco=wco,
          wcf=wcf,
          forget_bias=self._forget_bias,
          use_peephole=self._use_peephole)

      return (h, (cs, h))
 def testInvalidGlobalStep(self):
   with ops.Graph().as_default() as g, self.test_session(graph=g):
     x = array_ops.placeholder(dtypes.float32, [])
     var = variable_scope.get_variable(
         "test", [], initializer=init_ops.constant_initializer(10))
     loss = math_ops.abs(var * x)
     with self.assertRaises(AttributeError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=constant_op.constant(
               43, dtype=dtypes.int64),
           learning_rate=0.1,
           optimizer="SGD")
     with self.assertRaises(TypeError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=variable_scope.get_variable(
               "global_step", [],
               trainable=False,
               dtype=dtypes.float64,
               initializer=init_ops.constant_initializer(
                   0.0, dtype=dtypes.float64)),
           learning_rate=0.1,
           optimizer="SGD")
     with self.assertRaises(ValueError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=variable_scope.get_variable(
               "global_step", [1],
               trainable=False,
               dtype=dtypes.int64,
               initializer=init_ops.constant_initializer(
                   [0], dtype=dtypes.int64)),
           learning_rate=0.1,
           optimizer="SGD")
Example #9
0
 def build(self, input_shape):
   input_shape = tensor_shape.TensorShape(input_shape)
   if input_shape.ndims is None:
     raise ValueError('Inputs to `Dense` should have known rank.')
   if len(input_shape) < 2:
     raise ValueError('Inputs to `Dense` should have rank >= 2.')
   if input_shape[-1].value is None:
     raise ValueError('The last dimension of the inputs to `Dense` '
                      'should be defined. Found `None`.')
   # Note that we set `trainable=True` because this is a trainable
   # weight of the layer. If the layer is not trainable
   # (self.trainable = False), the variable will not be added to
   # tf.trainable_variables(), and self.trainable_weights will be empty.
   self.kernel = vs.get_variable('kernel',
                                 shape=[input_shape[-1].value, self.units],
                                 initializer=self.kernel_initializer,
                                 regularizer=self.kernel_regularizer,
                                 dtype=self.dtype,
                                 trainable=True)
   if self.use_bias:
     self.bias = vs.get_variable('bias',
                                 shape=[self.units,],
                                 initializer=self.bias_initializer,
                                 regularizer=self.bias_regularizer,
                                 dtype=self.dtype,
                                 trainable=True)
   else:
     self.bias = None
Example #10
0
 def testAllowsReuseWithoutPartitioner(self):
   with variable_scope.variable_scope(
       "scope0", partitioner=axis0_into2_partitioner):
     v = variable_scope.get_variable("name0", shape=(3, 1, 1))
   with variable_scope.variable_scope("scope0", reuse=True):
     v_reused = variable_scope.get_variable("name0")
   self.assertEqual(v, v_reused)
Example #11
0
  def _testPartitionConcatenatesAlongCorrectAxis(self, use_resource):

    def _part_axis_0(**unused_kwargs):
      return (2, 1, 1)

    def _part_axis_1(**unused_kwargs):
      return (1, 2, 1)

    with variable_scope.variable_scope("root", use_resource=use_resource):
      v0 = variable_scope.get_variable(
          "n0", shape=(2, 2, 2), partitioner=_part_axis_0)
      v1 = variable_scope.get_variable(
          "n1", shape=(2, 2, 2), partitioner=_part_axis_1)

    self.assertEqual(v0.get_shape(), (2, 2, 2))
    self.assertEqual(v1.get_shape(), (2, 2, 2))

    n0_0 = list(v0)[0]
    n0_1 = list(v0)[1]
    self.assertEqual(n0_0.get_shape(), (1, 2, 2))
    self.assertEqual(n0_1.get_shape(), (1, 2, 2))

    n1_0 = list(v1)[0]
    n1_1 = list(v1)[1]
    self.assertEqual(n1_0.get_shape(), (2, 1, 2))
    self.assertEqual(n1_1.get_shape(), (2, 1, 2))
Example #12
0
  def testInitFromNonInitializer(self):
    with self.test_session() as sess:
      # Test various dtypes with zeros initializer as following:
      types = [
          dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.uint16, dtypes.int32,
          dtypes.int64, dtypes.bool
      ]

      # Use different varibale_name to distinguish various dtypes
      for (i, dtype) in enumerate(types):
        x = variable_scope.get_variable(
            name="x%d" % i,
            shape=(3, 4),
            dtype=dtype,
            partitioner=axis0_into2_partitioner)
        y = variable_scope.get_variable(
            name="y%d" % i,
            shape=(6, 4),
            dtype=dtype,
            partitioner=axis0_into2_partitioner,
            initializer=init_ops.zeros_initializer(dtype=dtype))

        variables_lib.global_variables_initializer().run()
        # x and y would become var list after partition
        val_x = sess.run(list(x))
        val_y = sess.run(list(y))

        self.assertAllEqual(val_x, val_y)
Example #13
0
 def testReturnsExistingConcatenatedValueIfReuse(self):
   with variable_scope.variable_scope(
       "scope0", partitioner=axis0_into2_partitioner):
     v_concat = variable_scope.get_variable("name0", shape=(3, 1, 1))
     variable_scope.get_variable_scope().reuse_variables()
     v_concat_2 = variable_scope.get_variable("name0", shape=(3, 1, 1))
     self.assertEqual(v_concat, v_concat_2)
Example #14
0
  def testVarOpScopeReuseParam(self):
    with self.test_session():
      with variable_scope.variable_scope("outer") as outer:
        with variable_scope.variable_scope("tower", "default", []):
          self.assertEqual(
              variable_scope.get_variable("w", []).name, "outer/tower/w:0")
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer/tower/scope2/")
        with variable_scope.variable_scope(None, "default", []):
          self.assertEqual(
              variable_scope.get_variable("w", []).name, "outer/default/w:0")
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer/default/scope2/")

      with variable_scope.variable_scope(outer) as outer:
        with variable_scope.variable_scope("tower", "default", reuse=True):
          self.assertEqual(
              variable_scope.get_variable("w", []).name, "outer/tower/w:0")
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer_1/tower/scope2/")
        outer.reuse_variables()
        with variable_scope.variable_scope(None, "default", []):
          self.assertEqual(
              variable_scope.get_variable("w", []).name, "outer/default/w:0")
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "outer_1/default/scope2/")
Example #15
0
 def testGetGlobalVariables(self):
   with self.test_session():
     a = variable_scope.get_variable("a", [])
     with variable_scope.variable_scope("foo") as scope:
       b = variable_scope.get_variable("b", [])
       self.assertEqual([v.name
                         for v in scope.global_variables()], ["foo/b:0"])
Example #16
0
  def _GenerateTestInputs(self):
    np.random.seed(0)
    weights = np.random.randn(self._num_classes, self._dim).astype(np.float32)
    biases = np.random.randn(self._num_classes).astype(np.float32)
    hidden_acts = np.random.randn(self._batch_size,
                                  self._dim).astype(np.float32)

    with ops.Graph().as_default() as g:
      sharded_weights = variable_scope.get_variable(
          "w",
          partitioner=partitioned_variables.fixed_size_partitioner(
              self._num_shards),
          initializer=constant_op.constant(weights))
      sharded_biases = variable_scope.get_variable(
          "b",
          partitioner=partitioned_variables.fixed_size_partitioner(
              self._num_shards),
          initializer=constant_op.constant(biases))
      with self.test_session(graph=g) as sess:
        variables.global_variables_initializer().run()

        sharded_weights_v, sharded_biases_v = sess.run(
            [list(sharded_weights), list(sharded_biases)])

    return weights, biases, hidden_acts, sharded_weights_v, sharded_biases_v
Example #17
0
  def testVarOpScope(self):
    with self.test_session():
      with ops.name_scope("scope1"):
        with variable_scope.variable_scope("tower", "default", []):
          self.assertEqual(
              variable_scope.get_variable("w", []).name, "tower/w:0")
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope1/tower/scope2/")
        with variable_scope.variable_scope("tower", "default", []):
          with self.assertRaises(ValueError):
            variable_scope.get_variable("w", [])
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope1/tower_1/scope2/")

      with ops.name_scope("scope2"):
        with variable_scope.variable_scope(None, "default", []):
          self.assertEqual(
              variable_scope.get_variable("w", []).name, "default/w:0")
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope2/default/scope2/")
        with variable_scope.variable_scope(None, "default", []):
          self.assertEqual(
              variable_scope.get_variable("w", []).name, "default_1/w:0")
          with ops.name_scope("scope2") as sc2:
            self.assertEqual(sc2, "scope2/default_1/scope2/")
  def testTraining(self):
    """Tests a gradient descent step for a simple model."""
    with self.test_session() as session:
      with self.test_scope():
        with variable_scope.variable_scope("ascope", use_resource=True):
          w = variable_scope.get_variable(
              "w",
              shape=[4, 2],
              dtype=dtypes.float32,
              initializer=init_ops.constant_initializer(
                  np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32)))
          b = variable_scope.get_variable(
              "b",
              shape=[2],
              dtype=dtypes.float32,
              initializer=init_ops.constant_initializer(
                  np.array([2, 3], dtype=np.float32)))

          x = array_ops.placeholder(dtypes.float32, shape=[1, 4])
          y = math_ops.matmul(x, w) + b
          loss = math_ops.reduce_sum(y)
          optimizer = GradientDescentOptimizer(0.1)
          train = optimizer.minimize(loss)

      session.run(variables.global_variables_initializer())
      session.run(train, {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
      vw, vb = session.run([w, b])
      self.assertAllClose(
          np.array(
              [[0.3, 1.3], [2.7, 3.7], [4.5, 5.5], [6.1, 7.1]],
              dtype=np.float32),
          vw,
          rtol=1e-4)
      self.assertAllClose(np.array([1.9, 2.9], dtype=np.float32), vb, rtol=1e-4)
 def testRegisterSingleParamRegisteredInTuple(self):
   x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
   y = variable_scope.get_variable('y', initializer=array_ops.constant(1,))
   lc = layer_collection.LayerCollection()
   lc.fisher_blocks = {(x, y): '1'}
   lc.register_block(x, 'foo')
   self.assertEqual(set(['1']), set(lc.get_blocks()))
Example #20
0
  def __call__(self, x, h_prev, scope=None):
    """GRU cell."""
    with vs.variable_scope(scope or type(self).__name__):
      input_size = x.get_shape().with_rank(2)[1]

      # Check if the input size exist.
      if input_size is None:
        raise ValueError("Expecting input_size to be set.")

      # Check cell_size == state_size from h_prev.
      cell_size = h_prev.get_shape().with_rank(2)[1]
      if cell_size != self._cell_size:
        raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
                         (self._cell_size, cell_size))

      if cell_size is None:
        raise ValueError("cell_size from `h_prev` should not be None.")

      w_ru = vs.get_variable("w_ru", [input_size + self._cell_size,
                                      self._cell_size * 2])
      b_ru = vs.get_variable(
          "b_ru", [self._cell_size * 2],
          initializer=init_ops.constant_initializer(1.0))
      w_c = vs.get_variable("w_c",
                            [input_size + self._cell_size, self._cell_size])
      b_c = vs.get_variable(
          "b_c", [self._cell_size],
          initializer=init_ops.constant_initializer(0.0))

      _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
      _, _, _, new_h = _gru_block_cell(
          x=x, h_prev=h_prev, w_ru=w_ru, w_c=w_c, b_ru=b_ru, b_c=b_c)

      return new_h, new_h
 def _annotated_graph(self):
   graph = ops.Graph()
   with graph.as_default():
     random_seed.set_random_seed(2)
     current_activation = variable_scope.get_variable(
         name='start', shape=[1, 2, 2, 5])
     conv_filter = variable_scope.get_variable(
         name='filter', shape=[5, 5, 5, 5])
     for layer_number in range(3):
       with variable_scope.variable_scope('layer_{}'.format(layer_number)):
         after_conv = nn.conv2d(current_activation, conv_filter, [1, 1, 1, 1],
                                'SAME')
         current_activation = 2. * after_conv
         current_activation.op._set_attr(
             '_recompute_hint',
             # The value of the attribute does not matter; just that the key
             # exists in the op's attributes.
             attr_value_pb2.AttrValue(i=1))
         current_activation += 5.
         current_activation.op._set_attr(
             '_recompute_hint', attr_value_pb2.AttrValue(i=0))
         current_activation = nn.relu(current_activation)
         current_activation.op._set_attr(
             '_recompute_hint', attr_value_pb2.AttrValue(i=1))
     loss = math_ops.reduce_mean(current_activation)
     optimizer = train.AdamOptimizer(0.001)
     train_op = optimizer.minimize(loss)
     init_op = variables.global_variables_initializer()
   return graph, init_op, train_op
Example #22
0
  def testPartitionConcatenatesAlongCorrectAxis(self):

    def _part_axis_0(**unused_kwargs):
      return (2, 1, 1)

    def _part_axis_1(**unused_kwargs):
      return (1, 2, 1)

    with variable_scope.variable_scope("root"):
      v0 = variable_scope.get_variable(
          "n0", shape=(2, 2, 2), partitioner=_part_axis_0)
      v1 = variable_scope.get_variable(
          "n1", shape=(2, 2, 2), partitioner=_part_axis_1)

    self.assertEqual(v0.get_shape(), (2, 2, 2))
    self.assertEqual(v1.get_shape(), (2, 2, 2))

    n0_0 = ops.get_default_graph().get_tensor_by_name("root/n0/part_0:0")
    n0_1 = ops.get_default_graph().get_tensor_by_name("root/n0/part_1:0")
    self.assertEqual(n0_0.get_shape(), (1, 2, 2))
    self.assertEqual(n0_1.get_shape(), (1, 2, 2))

    n1_0 = ops.get_default_graph().get_tensor_by_name("root/n1/part_0:0")
    n1_1 = ops.get_default_graph().get_tensor_by_name("root/n1/part_1:0")
    self.assertEqual(n1_0.get_shape(), (2, 1, 2))
    self.assertEqual(n1_1.get_shape(), (2, 1, 2))
  def testErrorConditions(self):
    self.assertRaises(ValueError, ws_util._WarmStartSettings, None)
    x = variable_scope.get_variable(
        "x",
        shape=[4, 1],
        initializer=ones(),
        partitioner=lambda shape, dtype: [2, 1])

    # List of PartitionedVariable is invalid type when warmstarting with vocab.
    self.assertRaises(TypeError, ws_util._warmstart_var_with_vocab, [x], "/tmp",
                      5, "/tmp", "/tmp")
    # Keys of type other than FeatureColumn.
    self.assertRaises(TypeError, ws_util._warmstart,
                      {"StringType": x}, ws_util._WarmStartSettings("/tmp"))

    # Unused variable names raises ValueError.
    with ops.Graph().as_default():
      with self.test_session() as sess:
        x = variable_scope.get_variable(
            "x",
            shape=[4, 1],
            initializer=ones(),
            partitioner=lambda shape, dtype: [2, 1])
        self._write_checkpoint(sess)

    self.assertRaises(ValueError, ws_util._warmstart,
                      ws_util._WarmStartSettings(
                          self.get_temp_dir(),
                          var_name_to_vocab_info={
                              "y": ws_util._VocabInfo("", 1, 0, "")
                          }))
    self.assertRaises(ValueError, ws_util._warmstart,
                      ws_util._WarmStartSettings(
                          self.get_temp_dir(),
                          var_name_to_prev_var_name={"y": "y2"}))
  def build(self, input_shape):
    if len(input_shape) != self.rank + 2:
      raise ValueError('Inputs should have rank ' +
                       str(self.rank + 2) +
                       'Received input shape:', str(input_shape))
    if self.data_format == 'channels_first':
      channel_axis = 1
    else:
      channel_axis = -1
    if input_shape[channel_axis] is None:
      raise ValueError('The channel dimension of the inputs '
                       'should be defined. Found `None`.')
    input_dim = input_shape[channel_axis]
    kernel_shape = self.kernel_size + (input_dim, self.filters)

    self.kernel = vs.get_variable('kernel',
                                  shape=kernel_shape,
                                  initializer=self.kernel_initializer,
                                  regularizer=self.kernel_regularizer,
                                  trainable=True,
                                  dtype=self.dtype)
    if self.use_bias:
      self.bias = vs.get_variable('bias',
                                  shape=(self.filters,),
                                  initializer=self.bias_initializer,
                                  regularizer=self.bias_regularizer,
                                  trainable=True,
                                  dtype=self.dtype)
    else:
      self.bias = None
  def testWarmStartMoreSettingsNoPartitioning(self):
    # Create old and new vocabs for sparse column "sc_vocab".
    prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                        "old_vocab")
    new_vocab_path = self._write_vocab(
        ["orange", "guava", "banana", "apple", "raspberry",
         "blueberry"], "new_vocab")
    # Create feature columns.
    sc_hash = fc.categorical_column_with_hash_bucket(
        "sc_hash", hash_bucket_size=15)
    sc_keys = fc.categorical_column_with_vocabulary_list(
        "sc_keys", vocabulary_list=["a", "b", "c", "e"])
    sc_vocab = fc.categorical_column_with_vocabulary_file(
        "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
    all_linear_cols = [sc_hash, sc_keys, sc_vocab]

    # Save checkpoint from which to warm-start.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as sess:
        variable_scope.get_variable(
            "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
        sc_keys_weights = variable_scope.get_variable(
            "some_other_name", shape=[4, 1], initializer=rand())
        variable_scope.get_variable(
            "linear_model/sc_vocab/weights",
            initializer=[[0.5], [1.], [2.], [3.]])
        self._write_checkpoint(sess)
        prev_keys_val = sess.run(sc_keys_weights)

    # New graph, new session with warmstarting.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as sess:
        cols_to_vars = self._create_linear_model(all_linear_cols,
                                                 partitioner=None)
        vocab_info = ws_util._VocabInfo(
            new_vocab=sc_vocab.vocabulary_file,
            new_vocab_size=sc_vocab.vocabulary_size,
            num_oov_buckets=sc_vocab.num_oov_buckets,
            old_vocab=prev_vocab_path
        )
        ws_settings = ws_util._WarmStartSettings(
            self.get_temp_dir(),
            vars_to_warmstart=".*(sc_keys|sc_vocab).*",
            var_name_to_vocab_info={
                ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info
            },
            var_name_to_prev_var_name={
                ws_util._infer_var_name(cols_to_vars[sc_keys]):
                    "some_other_name"
            })
        ws_util._warmstart(ws_settings)
        sess.run(variables.global_variables_initializer())
        # Verify weights were correctly warmstarted.  Var corresponding to
        # sc_hash should not be warm-started.  Var corresponding to sc_vocab
        # should be correctly warmstarted after vocab remapping.
        self._assert_cols_to_vars(cols_to_vars, {
            sc_keys: [prev_keys_val],
            sc_hash: [np.zeros([15, 1])],
            sc_vocab: [np.array([[3.], [2.], [1.], [0.5], [0.], [0.]])]
        }, sess)
 def testRestoreOnAssign(self):
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
   save_graph = ops.Graph()
   with save_graph.as_default(), self.test_session(save_graph):
     first = checkpointable.Checkpointable()
     first.var1 = variable_scope.get_variable(
         name="outside_var", initializer=0.)
     first.var2 = variable_scope.get_variable(
         name="blah", initializer=0.)
     self.evaluate(first.var1.assign(4.))
     self.evaluate(first.var2.assign(8.))
     save_path = checkpointable_utils.CheckpointableSaver(first).save(
         checkpoint_prefix)
   restore_graph = ops.Graph()
   with restore_graph.as_default(), self.test_session(restore_graph):
     second = checkpointable.Checkpointable()
     second.var2 = variable_scope.get_variable(
         name="blah", initializer=0.)
     status = checkpointable_utils.CheckpointableSaver(
         second).restore(save_path)
     recreated_var1 = variable_scope.get_variable(
         name="outside_var", initializer=0.)
     status.run_restore_ops()
     self.assertEqual(8., self.evaluate(second.var2))
     self.evaluate(recreated_var1.assign(-2.))
     self.assertEqual(-2., self.evaluate(recreated_var1))
     second.var1 = recreated_var1
     status.run_restore_ops()
     self.assertEqual(4., self.evaluate(recreated_var1))
Example #27
0
def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, decay=0.9, scale_after_normalization=True):
    """Batch Normalization

  Args:
    tensor_in: input Tensor, 4D shape: [batch, in_height, in_width, in_depth].
    epsilon : A float number to avoid being divided by 0.
    decay: decay rate for exponential moving average.
    convnet: Whether this is for convolutional net use. If this is True,
      moments will sum across axis [0, 1, 2]. Otherwise, only [0].
    scale_after_normalization: Whether to scale after normalization.
  """
    shape = tensor_in.get_shape().as_list()

    with vs.variable_scope("batch_norm"):
        gamma = vs.get_variable("gamma", [shape[-1]], initializer=init_ops.random_normal_initializer(1.0, 0.02))
        beta = vs.get_variable("beta", [shape[-1]], initializer=init_ops.constant_initializer(0.0))
        ema = moving_averages.ExponentialMovingAverage(decay=decay)
        if convnet:
            assign_mean, assign_var = nn.moments(tensor_in, [0, 1, 2])
        else:
            assign_mean, assign_var = nn.moments(tensor_in, [0])
        ema_assign_op = ema.apply([assign_mean, assign_var])
        ema_mean, ema_var = ema.average(assign_mean), ema.average(assign_var)

        def update_mean_var():
            """Internal function that updates mean and variance during training"""
            with ops.control_dependencies([ema_assign_op]):
                return array_ops_.identity(assign_mean), array_ops_.identity(assign_var)

        is_training = array_ops_.squeeze(ops.get_collection("IS_TRAINING"))
        mean, variance = control_flow_ops.cond(is_training, update_mean_var, lambda: (ema_mean, ema_var))
        return nn.batch_norm_with_global_normalization(
            tensor_in, mean, variance, beta, gamma, epsilon, scale_after_normalization=scale_after_normalization
        )
  def testOptimizerInit(self):
    with ops.Graph().as_default():
      layer_collection = lc.LayerCollection()

      inputs = array_ops.ones((2, 1)) * 2
      weights_val = np.ones((1, 1), dtype=np.float32) * 3.
      weights = variable_scope.get_variable(
          'w', initializer=array_ops.constant(weights_val))
      bias = variable_scope.get_variable(
          'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
      output = math_ops.matmul(inputs, weights) + bias

      layer_collection.register_fully_connected((weights, bias), inputs, output)

      logits = math_ops.tanh(output)
      targets = array_ops.constant([[0.], [1.]])
      output = math_ops.reduce_mean(
          nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))

      layer_collection.register_categorical_predictive_distribution(logits)

      optimizer.KfacOptimizer(
          0.1,
          0.2,
          0.3,
          layer_collection,
          momentum=0.5,
          momentum_type='regular')
Example #29
0
  def _project_input(self, inputs, c_prev, m_prev, with_c):
    """Fills in c_prev and m_prev with projected input, for input dimensions
    """
    conf = self._config

    if (inputs is not None and inputs.get_shape().with_rank(2)[1].value > 0
        and len(conf.inputs) > 0):
      if isinstance(inputs, tuple):
        if len(conf.inputs) != len(inputs):
          raise ValueError("Expect inputs as a tuple of {} "
                           "tensors".format(len(conf.inputs)))
        input_splits = inputs
      else:
        input_splits = array_ops.split(
          value=inputs, num_or_size_splits=len(conf.inputs), axis=1)
      input_sz = input_splits[0].get_shape().with_rank(2)[1].value

      for i, j in enumerate(conf.inputs):
        input_project_m = vs.get_variable(
          'project_m_{}'.format(j), [input_sz, conf.num_units],
          dtype=inputs.dtype)
        m_prev[j] = math_ops.matmul(input_splits[i], input_project_m)

        if with_c:
          input_project_c = vs.get_variable(
            'project_c_{}'.format(j), [input_sz, conf.num_units],
            dtype=inputs.dtype)
          c_prev[j] = math_ops.matmul(input_splits[i], input_project_c)
Example #30
0
def weighted_moving_average(value,
                            decay,
                            weight,
                            truediv=True,
                            collections=None,
                            name=None):
  """Compute the weighted moving average of `value`.

  Conceptually, the weighted moving average is:
    `moving_average(value * weight) / moving_average(weight)`,
  where a moving average updates by the rule
    `new_value = decay * old_value + (1 - decay) * update`
  Internally, this Op keeps moving average variables of both `value * weight`
  and `weight`.

  Args:
    value: A numeric `Tensor`.
    decay: A float `Tensor` or float value.  The moving average decay.
    weight:  `Tensor` that keeps the current value of a weight.
      Shape should be able to multiply `value`.
    truediv:  Boolean, if `True`, dividing by `moving_average(weight)` is
      floating point division.  If `False`, use division implied by dtypes.
    collections:  List of graph collections keys to add the internal variables
      `value * weight` and `weight` to.
      Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
    name: Optional name of the returned operation.
      Defaults to "WeightedMovingAvg".

  Returns:
    An Operation that updates and returns the weighted moving average.
  """
  # Unlike assign_moving_average, the weighted moving average doesn't modify
  # user-visible variables. It is the ratio of two internal variables, which are
  # moving averages of the updates.  Thus, the signature of this function is
  # quite different than assign_moving_average.
  if collections is None:
    collections = [ops.GraphKeys.GLOBAL_VARIABLES]
  with variable_scope.variable_scope(name, "WeightedMovingAvg",
                                     [value, weight, decay]) as scope:
    value_x_weight_var = variable_scope.get_variable(
        "value_x_weight",
        initializer=init_ops.zeros_initializer(value.get_shape(),
                                               dtype=value.dtype),
        trainable=False,
        collections=collections)
    weight_var = variable_scope.get_variable(
        "weight",
        initializer=init_ops.zeros_initializer(weight.get_shape(),
                                               dtype=weight.dtype),
        trainable=False,
        collections=collections)
    numerator = assign_moving_average(
        value_x_weight_var, value * weight, decay, zero_debias=False)
    denominator = assign_moving_average(
        weight_var, weight, decay, zero_debias=False)

    if truediv:
      return math_ops.truediv(numerator, denominator, name=scope.name)
    else:
      return math_ops.div(numerator, denominator, name=scope.name)
Example #31
0
def weighted_moving_average(value,
                            decay,
                            weight,
                            truediv=True,
                            collections=None,
                            name=None):
    """Compute the weighted moving average of `value`.

  Conceptually, the weighted moving average is:
    `moving_average(value * weight) / moving_average(weight)`,
  where a moving average updates by the rule
    `new_value = decay * old_value + (1 - decay) * update`
  Internally, this Op keeps moving average variables of both `value * weight`
  and `weight`.

  Args:
    value: A numeric `Tensor`.
    decay: A float `Tensor` or float value.  The moving average decay.
    weight:  `Tensor` that keeps the current value of a weight. Shape should be
      able to multiply `value`.
    truediv:  Boolean, if `True`, dividing by `moving_average(weight)` is
      floating point division.  If `False`, use division implied by dtypes.
    collections:  List of graph collections keys to add the internal variables
      `value * weight` and `weight` to. Defaults to
      `[GraphKeys.GLOBAL_VARIABLES]`.
    name: Optional name of the returned operation. Defaults to
      "WeightedMovingAvg".

  Returns:
    An Operation that updates and returns the weighted moving average.
  """
    # Unlike assign_moving_average, the weighted moving average doesn't modify
    # user-visible variables. It is the ratio of two internal variables, which are
    # moving averages of the updates.  Thus, the signature of this function is
    # quite different than assign_moving_average.
    if collections is None:
        collections = [ops.GraphKeys.GLOBAL_VARIABLES]
    with variable_scope.variable_scope(name, "WeightedMovingAvg",
                                       [value, weight, decay]) as scope:
        value_x_weight_var = variable_scope.get_variable(
            "value_x_weight",
            shape=value.get_shape(),
            dtype=value.dtype,
            initializer=init_ops.zeros_initializer(),
            trainable=False,
            collections=collections)
        weight_var = variable_scope.get_variable(
            "weight",
            shape=weight.get_shape(),
            dtype=weight.dtype,
            initializer=init_ops.zeros_initializer(),
            trainable=False,
            collections=collections)
        numerator = assign_moving_average(value_x_weight_var,
                                          value * weight,
                                          decay,
                                          zero_debias=False)
        denominator = assign_moving_average(weight_var,
                                            weight,
                                            decay,
                                            zero_debias=False)

        if truediv:
            return math_ops.truediv(numerator, denominator, name=scope.name)
        else:
            return math_ops.divide(numerator, denominator, name=scope.name)
def variable_scoped_function():
  return variable_scope.get_variable(
      "dummy", shape=[1], initializer=init_ops.zeros_initializer())
Example #33
0
 def call(self, inputs):
   variable_scope.get_variable(
       'my_call_var', [2, 2], initializer=init_ops.zeros_initializer())
   return inputs
Example #34
0
 def build(self, input_shape):
   self.my_var = variable_scope.get_variable(
       'my_var', [2, 2], initializer=init_ops.zeros_initializer())
Example #35
0
def legacy_convolution2d(x,
                         num_output_channels,
                         kernel_size,
                         activation_fn=None,
                         stride=(1, 1),
                         padding='SAME',
                         weight_init=initializers.xavier_initializer_conv2d(),
                         bias_init=standard_ops.zeros_initializer,
                         name=None,
                         weight_collections=(ops.GraphKeys.WEIGHTS, ),
                         bias_collections=(ops.GraphKeys.BIASES, ),
                         output_collections=(ops.GraphKeys.ACTIVATIONS, ),
                         trainable=True,
                         weight_regularizer=None,
                         bias_regularizer=None):
    # pylint: disable=g-docstring-has-escape
    """Adds the parameters for a conv2d layer and returns the output.

  A neural network convolution layer is generally defined as:
  \\\\(y = f(conv2d(w, x) + b)\\\\) where **f** is given by `activation_fn`,
  **conv2d** is `tf.nn.conv2d` and `x` has shape
  `[batch, height, width, channels]`. The output of this op is of shape
  `[batch, out_height, out_width, num_output_channels]`, where `out_width` and
  `out_height` are determined by the `padding` argument. See `conv2D` for
  details.

  This op creates `w` and optionally `b` and adds various summaries that can be
  useful for visualizing learning or diagnosing training problems. Bias can be
  disabled by setting `bias_init` to `None`.

  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.

  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and which collections to place
  the created variables in (`weight_collections` and `bias_collections`).

  A per layer regularization can be specified by setting `weight_regularizer`.
  This is only applied to weights and not the bias.

  Args:
    x: A 4-D input `Tensor`.
    num_output_channels: The number of output channels (i.e. the size of the
      last dimension of the output).
    kernel_size: A length 2 `list` or `tuple` containing the kernel size.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity.
    stride: A length 2 `list` or `tuple` specifying the stride of the sliding
      window across the image.
    padding: A `string` from: "SAME", "VALID". The type of padding algorithm to
      use.
    weight_init: An optional initialization. If not specified, uses Xavier
      initialization (see `tf.learn.xavier_initializer`).
    bias_init: An initializer for the bias, defaults to 0. Set to`None` in order
      to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "convolution2d" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.

  Returns:
    The result of applying a 2-D convolutional layer.

  Raises:
    ValueError: If `kernel_size` or `stride` are not length 2.
  """
    # TODO(ptucker) redirect to convolution2d
    #   _ = trainable
    #   variables_collections = {'weights': weight_collections,
    #                            'biases': bias_collections}
    #   outputs = convolution2d(inputs=x,
    #                           num_outputs=num_output_channels,
    #                           kernel_size=kernel_size,
    #                           stride=stride,
    #                           padding=padding,
    #                           activation_fn=activation_fn,
    #                           weights_initializer=weight_init,
    #                           weights_regularizer=weight_regularizer,
    #                           biases_initializer=bias_init,
    #                           biases_regularizer=bias_regularizer,
    #                           variables_collections=variables_collections,
    #                           scope=name)
    #   ops.add_to_collections(output_collections, outputs)
    #   return outputs
    with variable_scope.variable_op_scope([x], name, 'convolution2d'):
        num_input_channels = x.get_shape().dims[3].value

        if len(kernel_size) != 2:
            raise ValueError('kernel_size must be length 2: %d ' % kernel_size)
        if len(stride) != 2:
            raise ValueError('stride must be length 2: %d' % stride)

        stride = [1, stride[0], stride[1], 1]
        shape = [
            kernel_size[0], kernel_size[1], num_input_channels,
            num_output_channels
        ]
        dtype = x.dtype.base_dtype

        weight_collections = set(
            list(weight_collections or []) + [ops.GraphKeys.VARIABLES])
        w = variable_scope.get_variable('weights',
                                        shape=shape,
                                        dtype=dtype,
                                        initializer=weight_init,
                                        collections=weight_collections,
                                        regularizer=weight_regularizer,
                                        trainable=trainable)

        y = nn.conv2d(x, w, stride, padding)

        if bias_init is not None:
            bias_collections = set(
                list(bias_collections or []) + [ops.GraphKeys.VARIABLES])
            b = variable_scope.get_variable('bias',
                                            shape=[num_output_channels],
                                            dtype=dtype,
                                            initializer=bias_init,
                                            collections=bias_collections,
                                            regularizer=bias_regularizer,
                                            trainable=trainable)

            y = nn.bias_add(y, b)

        return _apply_activation(y, activation_fn, output_collections)
Example #36
0
def legacy_fully_connected(x,
                           num_output_units,
                           activation_fn=None,
                           weight_init=initializers.xavier_initializer(),
                           bias_init=init_ops.zeros_initializer,
                           name=None,
                           weight_collections=(ops.GraphKeys.WEIGHTS, ),
                           bias_collections=(ops.GraphKeys.BIASES, ),
                           output_collections=(ops.GraphKeys.ACTIVATIONS, ),
                           trainable=True,
                           weight_regularizer=None,
                           bias_regularizer=None):
    # pylint: disable=anomalous-backslash-in-string
    r"""Adds the parameters for a fully connected layer and returns the output.

  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.

  If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)]
  with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix
  multiply along the first dimensions. The result r is a tensor of shape
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`],
  where \\\( r_{i_0, ..., i_{n-1}, k} =
  \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\).
  This is accomplished by reshaping `x` to 2-D
  [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)]
  before the matrix multiply and afterwards reshaping it to
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`].

  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.

  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.

  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and in which collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.

  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.

  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.

  Returns:
    The output of the fully connected layer.

  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
    # pylint: enable=anomalous-backslash-in-string
    # TODO(ptucker) redirect to fully_connected
    #   _ = trainable
    #   variables_collections = {'weights': weight_collections,
    #                            'biases': bias_collections}
    #   outputs = fully_connected(inputs=x,
    #                             num_outputs=num_output_units,
    #                             activation_fn=activation_fn,
    #                             weights_initializer=weight_init,
    #                             weights_regularizer=weight_regularizer,
    #                             biases_initializer=bias_init,
    #                             biases_regularizer=bias_regularizer,
    #                             variables_collections=variables_collections,
    #                             scope=name)
    #   ops.add_to_collections(output_collections, outputs)
    #   return outputs
    with variable_scope.variable_op_scope([x], name, 'fully_connected'):
        dims = x.get_shape().dims
        if dims is None:
            raise ValueError('dims of x must be known but is None')
        if len(dims) < 2:
            raise ValueError('rank of x must be at least 2 not: %d' %
                             len(dims))
        num_input_units = dims[-1].value
        if num_input_units is None:
            raise ValueError('last dimension of x must be known but is None')
        dtype = x.dtype.base_dtype

        weight_collections = set(
            list(weight_collections or []) + [ops.GraphKeys.VARIABLES])
        w = variable_scope.get_variable(
            'weights',
            shape=[num_input_units, num_output_units],
            dtype=dtype,
            initializer=weight_init,
            collections=weight_collections,
            regularizer=weight_regularizer,
            trainable=trainable)
        x_2_dim = x if len(dims) <= 2 else array_ops.reshape(
            x, [-1, num_input_units])
        y = standard_ops.matmul(x_2_dim, w)

        if bias_init is not None:
            bias_collections = set(
                list(bias_collections or []) + [ops.GraphKeys.VARIABLES])
            b = variable_scope.get_variable('bias',
                                            shape=[num_output_units],
                                            dtype=dtype,
                                            initializer=bias_init,
                                            collections=bias_collections,
                                            regularizer=bias_regularizer,
                                            trainable=trainable)

            y = nn.bias_add(y, b)

        if len(dims) > 2:
            out_shape = array_ops.unpack(array_ops.shape(x))
            out_shape[-1] = num_output_units

            y = array_ops.reshape(y, array_ops.pack(out_shape))

            static_shape = x.get_shape().as_list()
            static_shape[-1] = num_output_units
            y.set_shape(static_shape)

        return _apply_activation(y, activation_fn, output_collections)
Example #37
0
def _zero_debias(strategy, unbiased_var, value, decay):
    """Compute the delta required for a debiased Variable.

  All exponential moving averages initialized with Tensors are initialized to 0,
  and therefore are biased to 0. Variables initialized to 0 and used as EMAs are
  similarly biased. This function creates the debias updated amount according to
  a scale factor, as in (Kingma et al., 2015).

  To demonstrate the bias the results from 0-initialization, take an EMA that
  was initialized to `0` with decay `b`. After `t` timesteps of seeing the
  constant `c`, the variable have the following value:

  ```
    EMA = 0*b^(t) + c*(1 - b)*b^(t-1) + c*(1 - b)*b^(t-2) + ...
        = c*(1 - b^t)
  ```

  To have the true value `c`, we would divide by the scale factor `1 - b^t`.

  In order to perform debiasing, we use two shadow variables. One keeps track of
  the biased estimate, and the other keeps track of the number of updates that
  have occurred.

  Args:
    strategy: `Strategy` used to create and update variables.
    unbiased_var: A Variable representing the current value of the unbiased EMA.
    value: A Tensor representing the most recent value.
    decay: A Tensor representing `1-decay` for the EMA.

  Returns:
    The amount that the unbiased variable should be updated. Computing this
    tensor will also update the shadow variables appropriately.

  References:
    Adam - A Method for Stochastic Optimization:
      [Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
      ([pdf](https://arxiv.org/pdf/1412.6980.pdf))

  """
    with variable_scope.variable_scope(unbiased_var.name[:-len(":0")],
                                       values=[unbiased_var, value, decay]):
        with ops.init_scope():
            biased_initializer = init_ops.zeros_initializer()
            local_step_initializer = init_ops.zeros_initializer()

        def _maybe_get_unique(name):
            """Get name for a unique variable, if not `reuse=True`."""
            if variable_scope.get_variable_scope().reuse:
                return name
            vs_vars = [
                x.op.name for x in
                variable_scope.get_variable_scope().global_variables()
            ]
            full_name = variable_scope.get_variable_scope().name + "/" + name
            if full_name not in vs_vars:
                return name
            idx = 1
            while full_name + ("_%d" % idx) in vs_vars:
                idx += 1
            return name + ("_%d" % idx)

        with strategy.extended.colocate_vars_with(unbiased_var):
            biased_var = variable_scope.get_variable(
                _maybe_get_unique("biased"),
                initializer=biased_initializer,
                shape=unbiased_var.get_shape(),
                dtype=unbiased_var.dtype,
                trainable=False)
            local_step = variable_scope.get_variable(
                _maybe_get_unique("local_step"),
                shape=[],
                dtype=unbiased_var.dtype,
                initializer=local_step_initializer,
                trainable=False)

    def update_fn(v, value, biased_var, local_step):
        update_biased = state_ops.assign_sub(biased_var,
                                             (biased_var - value) * decay)
        update_local_step = local_step.assign_add(1)

        # This function gets `1 - decay`, so use `1.0 - decay` in the exponent.
        bias_factor = 1 - math_ops.pow(1.0 - decay, update_local_step)
        return state_ops.assign(v,
                                update_biased / bias_factor,
                                name=ops.get_name_scope() + "/")

    return strategy.extended.update(unbiased_var,
                                    update_fn,
                                    args=(value, biased_var, local_step))
Example #38
0
def _linear(args,
            output_size,
            bias,
            bias_initializer=None,
            kernel_initializer=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
    Args:
        args: a 2D Tensor or a list of 2D, batch x n, Tensors.
        output_size: int, second dimension of W[i].
        bias: boolean, whether to add a bias term or not.
        bias_initializer: starting value to initialize the bias
        (default is all zeros).
        kernel_initializer: starting value to initialize the weight.
    Returns:
        A 2D Tensor with shape [batch x output_size] equal to
        sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
    Raises:
        ValueError: if some of the arguments has unspecified or wrong shape.
    """
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape() for a in args]
    for shape in shapes:
        if (shape.ndims != 2):
            raise ValueError("linear is expecting 2D arguments: %s" % shapes)
        if shape[1].value is None:
            raise ValueError(
                "linear expects shape[1] to be provided for shape %s, "
                "but saw %s" % (shape, shape[1]))
        else:
            total_arg_size += shape[1].value

    dtype = [a.dtype for a in args][0]

    # Now the computation.
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope) as outer_scope:
        weights = vs.get_variable(_WEIGHTS_VARIABLE_NAME,
                                  [total_arg_size, output_size],
                                  dtype=dtype,
                                  initializer=kernel_initializer)
        if len(args) == 1:
            res = math_ops.matmul(args[0], weights)
        else:
            res = math_ops.matmul(array_ops.concat(args, 1), weights)
        if not bias:
            return res
        with vs.variable_scope(outer_scope) as inner_scope:
            inner_scope.set_partitioner(None)
            if bias_initializer is None:
                bias_initializer = init_ops.constant_initializer(0.0,
                                                                 dtype=dtype)
            biases = vs.get_variable(_BIAS_VARIABLE_NAME, [output_size],
                                     dtype=dtype,
                                     initializer=bias_initializer)
        return nn_ops.bias_add(res, biases)
Example #39
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  clip_gradients=None,
                  moving_average_decay=0.9,
                  learning_rate_decay_fn=None,
                  variables=None):
    """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    clip_gradients: float or None, clips gradients by this value.
    moving_average_decay: float or None, takes into account previous loss
                          to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes learning_rate and global_step
                            Tensors, returns Tensor. Can be used to implement
                            any learning rate decay functions.
                            For example: tf.train.exponential_decay.
    variables: list of variables to optimizer or none.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    # Moving average of the loss with decay.
    if moving_average_decay is not None:
        # Generate moving averages of the loss.
        loss_averages = train.ExponentialMovingAverage(moving_average_decay,
                                                       name="avg")
        loss_averages_op = loss_averages.apply([loss])
        logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
        loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

    # Learning rate variable, with possible decay.
    if isinstance(learning_rate, ops.Tensor) and len(
            learning_rate.get_shape()) == 0:
        lr = learning_rate
    elif isinstance(learning_rate, float):
        lr = vs.get_variable(
            "learning_rate", [],
            trainable=False,
            initializer=init_ops.constant_initializer(learning_rate))
    else:
        raise ValueError("Learning rate should be 0d Tensor or float. Got %s" %
                         str(learning_rate))
    if learning_rate_decay_fn is not None:
        lr = learning_rate_decay_fn(lr, global_step)

    # Create optimizer, given specified parameters.
    if isinstance(optimizer, six.string_types):
        if optimizer not in OPTIMIZER_CLS_NAMES:
            raise ValueError(
                "Optimizer name should be one of [%s], you provided %s." %
                (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
        opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
    elif isinstance(optimizer, type) and issubclass(optimizer,
                                                    optimizer_.Optimizer):
        opt = optimizer(learning_rate=lr)
    elif isinstance(optimizer, optimizer_.Optimizer):
        opt = optimizer
    else:
        raise ValueError("Unrecognized optimizer: should be string, "
                         "subclass of Optimizer or instance of "
                         "subclass of Optimizer. Got %s." % str(optimizer))

    # All trainable variables, if specific variables are not specified.
    if variables is None:
        variables = vars_.trainable_variables()

    # Compute gradients.
    gradients = opt.compute_gradients(loss, variables)

    # Optionally add gradient noise.
    if gradient_noise_scale is not None:
        gradients = _add_scaled_noise_to_gradients(gradients,
                                                   gradient_noise_scale)

    # Optionally clip gradients.
    if clip_gradients is not None:
        gradients, variables = zip(*gradients)
        clipped_gradients, _ = clip_ops.clip_by_global_norm(
            gradients, clip_gradients)
        gradients = list(zip(clipped_gradients, variables))

    # Add scalar summary for loss.
    logging_ops.scalar_summary("loss", loss)

    # Add histograms for variables, gradients and gradient norms.
    for gradient, variable in gradients:
        if isinstance(gradient, ops.IndexedSlices):
            grad_values = gradient.values
        else:
            grad_values = gradient

        if grad_values is not None:
            logging_ops.histogram_summary(variable.name, variable)
            logging_ops.histogram_summary(variable.name + "/gradients",
                                          grad_values)
            logging_ops.histogram_summary(variable.name + "/gradient_norm",
                                          clip_ops.global_norm([grad_values]))

    # Create gradient updates.
    grad_updates = opt.apply_gradients(gradients,
                                       global_step=global_step,
                                       name="train")
    # Make sure total_loss is valid.
    final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

    # Ensure the train_tensor computes grad_updates.
    train_tensor = control_flow_ops.with_dependencies([grad_updates],
                                                      final_loss)

    return train_tensor
Example #40
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of MemoryLSTM.
        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
          scope: VariableScope for the created subgraph; defaults to "LSTMCell".

        Returns:
          A tuple containing:
          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.

        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        (a, h_prev_summary, c_tape_prev, h_tape_prev) = state

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")

        with vs.variable_scope(scope or type(self).__name__, initializer=self._initializer):  # "LSTMCell"
            concat_w = rnn_cell._get_concat_variable(
                "W", [input_size.value + self._num_units, 4 * self._num_units], dtype, 1)

            b = vs.get_variable("Bias", shape=[4 * self._num_units], initializer=array_ops.zeros_initializer, dtype=dtype)

            # reshape tape to 3D
            c_tape_prev = array_ops.reshape(c_tape_prev, [-1, self._attn_length, self._num_units])
            h_tape_prev = array_ops.reshape(h_tape_prev, [-1, self._attn_length, self._num_units])

            a, new_c_summary, new_h_summary = self._attention(inputs, h_prev_summary, c_tape_prev, h_tape_prev)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [inputs, new_h_summary])
            lstm_matrix = tf.nn.bias_add(math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(1, 4, lstm_matrix)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable(
                    "W_F_diag", shape=[self._num_units], dtype=dtype)
                w_i_diag = vs.get_variable(
                    "W_I_diag", shape=[self._num_units], dtype=dtype)
                w_o_diag = vs.get_variable(
                    "W_O_diag", shape=[self._num_units], dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * new_c_summary) * new_c_summary +
                     sigmoid(i + w_i_diag * new_c_summary) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * new_c_summary + sigmoid(i) *
                     self._activation(j))

            if self._cell_clip is not None:
                c = tf.clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)

            if self._use_peepholes:
                h = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                h = sigmoid(o) * self._activation(c)

            # remove old value
            new_h_tape = array_ops.slice(h_tape_prev, [0, 1, 0], [-1, -1, -1])
            new_c_tape = array_ops.slice(c_tape_prev, [0, 1, 0], [-1, -1, -1])

            # append the new c and h to the tape
            new_c_tape = array_ops.concat(1, [new_c_tape, array_ops.expand_dims(c, 1)])
            new_h_tape = array_ops.concat(1, [new_h_tape, array_ops.expand_dims(h, 1)])

            # flatten the tape to 2D
            new_c_tape = array_ops.reshape(new_c_tape, [-1, self._attn_length * self._num_units])
            new_h_tape = array_ops.reshape(new_h_tape, [-1, self._attn_length * self._num_units])

            new_state = (a, new_h_summary, new_c_tape, new_h_tape)

            return h, new_state
Example #41
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.
    
        Args:
          inputs: input Tensor, 2D, batch x num_units.
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
          scope: VariableScope for the created subgraph; defaults to "LSTMCell".
    
        Returns:
          A tuple containing:
    
          - A `2-D, [batch x output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.
    
        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"
            concat_w = _get_concat_variable(
                "W", [input_size.value + num_proj, 4 * self._num_units], dtype,
                self._num_unit_shards)

            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=init_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [inputs, m_prev])
            lstm_matrix = nn_ops.bias_add(
                math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(1, 4, lstm_matrix)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                     c_prev +
                     sigmoid(i + w_i_diag * c_prev) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev +
                     sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)
                # pylint: enable=invalid-unary-operand-type

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                m = sigmoid(o) * self._activation(c)

            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj], dtype,
                    self._num_proj_shards)

                m = math_ops.matmul(m, concat_w_proj)
                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    m = clip_ops.clip_by_value(m, -self._proj_clip,
                                               self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat(1, [c, m]))
        return m, new_state
Example #42
0
    def testInitFromPartitionVar(self):
        checkpoint_dir = self.get_temp_dir()
        with self.cached_session() as session:
            v1 = _create_partition_checkpoints(session, checkpoint_dir)

        # New graph and session.
        with ops.Graph().as_default() as g:
            with self.session(graph=g) as session:
                with variable_scope.variable_scope("some_scope"):
                    my1 = variable_scope.get_variable(
                        name="my1",
                        shape=[100, 100],
                        initializer=init_ops.zeros_initializer(),
                        partitioner=partitioned_variables.
                        min_max_variable_partitioner(max_partitions=5,
                                                     axis=0,
                                                     min_slice_size=8 << 10))
                    my1_var_list = my1._get_variable_list()
                # Create another variable with different partitions than the variable in
                # the checkpoint.
                with variable_scope.variable_scope("some_other_scope"):
                    my2 = variable_scope.get_variable(
                        name="var1",
                        shape=[100, 100],
                        initializer=init_ops.zeros_initializer(),
                        partitioner=partitioned_variables.
                        min_max_variable_partitioner(max_partitions=5,
                                                     axis=0,
                                                     min_slice_size=16 << 10))
                    my2_var_list = my2._get_variable_list()

                checkpoint_utils.init_from_checkpoint(
                    checkpoint_dir, {
                        "scope/var1": "some_scope/my1",
                        "scope/": "some_other_scope/"
                    })

                session.run(variables.global_variables_initializer())
                my1_values = session.run(my1_var_list)
                self.assertAllEqual(my1_values, v1)
                my2_values = session.run(my2_var_list)
                # Verify we created different number of partitions.
                self.assertNotEquals(len(my2_values), len(v1))
                # Verify the values were correctly initialized inspite of different
                # partitions.
                full_my2_values = np.concatenate(my2_values, axis=0)
                full_v1_values = np.concatenate(v1, axis=0)
                self.assertAllEqual(full_my2_values, full_v1_values)

        # New graph and session.
        with ops.Graph().as_default() as g:
            with self.session(graph=g) as session:
                with variable_scope.variable_scope("some_scope"):
                    my1 = variable_scope.get_variable(
                        name="my1",
                        shape=[100, 100],
                        initializer=init_ops.truncated_normal_initializer(0.5),
                        partitioner=partitioned_variables.
                        min_max_variable_partitioner(max_partitions=5,
                                                     axis=0,
                                                     min_slice_size=8 << 10))
                    my1_var_list = my1._get_variable_list()

                checkpoint_utils.init_from_checkpoint(
                    checkpoint_dir, {
                        "scope/var1": my1_var_list,
                    })

                session.run(variables.global_variables_initializer())
                my1_values = session.run(my1_var_list)
                self.assertAllEqual(my1_values, v1)
Example #43
0
    def testResourceCountsAreCorrect(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                with variable_scope.variable_scope("vs", use_resource=True):
                    w1 = variable_scope.get_variable(
                        "w1",
                        shape=[4, 2],
                        dtype=np.float32,
                        initializer=init_ops.constant_initializer(
                            np.array([[1, 2], [3, 4], [5, 6], [7, 8]],
                                     dtype=np.float32)))
                    b1 = variable_scope.get_variable(
                        "b1",
                        shape=[2],
                        dtype=np.float32,
                        trainable=False,
                        initializer=init_ops.constant_initializer(
                            np.array([2, 3], dtype=np.float32)))
                    w2 = variable_scope.get_variable(
                        "w2",
                        shape=[2, 2],
                        dtype=np.float32,
                        initializer=init_ops.constant_initializer(
                            np.array([[1, 2], [3, 4]], dtype=np.float32)))
                    b2 = variable_scope.get_variable(
                        "b2",
                        shape=[2],
                        dtype=np.float32,
                        trainable=False,
                        initializer=init_ops.constant_initializer(
                            np.array([2, 3], dtype=np.float32)))

                x = array_ops.placeholder(np.float32, shape=[1, 4])
                y = math_ops.matmul(x, w1) + b1
                y = math_ops.matmul(y, w2) + b2

                loss = math_ops.reduce_sum(y)
                optimizer = gradient_descent.GradientDescentOptimizer(0.1)
                train = optimizer.minimize(loss)

            report = tu.ReportJSON(self, sess)

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[1, 2, 3, 4]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[1, 2, 3, 4]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})

            report.parse_log()
            report.assert_host_to_device_event_names([])
            report.assert_device_to_host_event_names([])

            # Explicitly fetch the first set of weights and biases
            sess.run([w1, b1])

            report.parse_log()
            report.assert_host_to_device_event_names([])
            report.assert_device_to_host_event_names([])
Example #44
0
from tensorflow.python.ops import state_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.platform import test
from tensorflow.python.tpu import tpu_feed


def create_test_xla_compile_context():
  computation_name = ops.get_default_graph().unique_name('computation')
  pivot = control_flow_ops.no_op(name=computation_name + '/pivot')
  return xla.XLACompileContext(name=computation_name, pivot=pivot)





a = variable_scope.get_variable(name='variable_a', use_resource=True, initializer=1)

context = create_test_xla_compile_context()
context.Enter()
a.assign(2)
context.Exit()


@def_function.function
def func():
  context = create_test_xla_compile_context()
  context.Enter()
  o = a.assign(2)
  context.Exit()
  return o
Example #45
0
def function_with_create(trainable):
    """Creates a variable as a side effect using tf.Variable."""
    variables.Variable(0, trainable=trainable)
    return variable_scope.get_variable(
        "dummy", shape=[1], initializer=init_ops.zeros_initializer())
 def initialize_graph(self, input_statistics=None):
   super(StubTimeSeriesModel, self).initialize_graph(
       input_statistics=input_statistics)
   self.prior_var = variable_scope.get_variable(
       "prior", [], initializer=init_ops.constant_initializer(0.))
 def test_no_variable_sharing(self):
     variable_scope.get_variable(name="step_size",
                                 initializer=np.array(1e-5, np.float32),
                                 use_resource=True,
                                 trainable=False)
Example #48
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False,
                  increment_global_step=True,
                  LARS_nu=None,
                  LARS_epsilon=1.0/16384.0,
                  loss_scale=1.0):
  """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers include:

  - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - by function taking learning rate `Tensor` as argument and returning an
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
  - by a subclass of `Optimizer` having a single-argument constructor
      (the argument is the learning rate), such as AdamOptimizer or
      AdagradOptimizer. E.g. `optimize_loss(...,
      optimizer=tf.train.AdagradOptimizer)`.
  - by an instance of a subclass of `Optimizer`.
      E.g., `optimize_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Scalar `Tensor`.
    global_step: Scalar int `Tensor`, step counter to update on each step
                 unless `increment_global_step` is `False`. If not supplied,
                 it will be fetched from the default graph (see
                 `tf.train.get_global_step` for details). If it has
                 not been created, no step will be incremented with each weight
                 update. `learning_rate_decay_fn` requires `global_step`.
    learning_rate: float or `Tensor`, magnitude of update per each training
                   step. Can be `None`.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of `tf.Optimizer` that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantiation of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float, callable or `None`. If float, is provided, a global
      clipping is applied to prevent the norm of the gradient to exceed this
      value. Alternatively, a callable can be provided e.g.: adaptive_clipping.
      This callable takes a `list` of `(gradients, variables)` `tuple`s and
      returns the same thing with the gradients modified.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: `tf.train.exponential_decay`.
                            Ignored if `learning_rate` is not supplied.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the
                                 corresponding op.
    increment_global_step: Whether to increment `global_step`. If your model
      calls `optimize_loss` multiple times per training step (e.g. to optimize
      different parts of the model), use this arg to avoid incrementing
      `global_step` more times than necessary.
    LARS_nu: If not None, LARS re-scaling will be applied https://arxiv.org/pdf/1708.03888.pdf with
      nu=LARS_nu
    LARS_epsilon: If either weight or gradient norm is zero, this will be returned as local LR

  Returns:
    Training op.

  Raises:
    ValueError: if:
        * `loss` is an invalid type or shape.
        * `global_step` is an invalid type or shape.
        * `learning_rate` is an invalid type or value.
        * `optimizer` has the wrong type.
        * `clip_gradients` is neither float nor callable.
        * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
          `global_step` is available.
        * `gradients` is empty.
  """
  loss = ops.convert_to_tensor(loss)
  contrib_framework.assert_scalar(loss)
  if global_step is None:
    global_step = contrib_framework.get_global_step()
  else:
    contrib_framework.assert_global_step(global_step)
  with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
    # Update ops take UPDATE_OPS collection if not provided.
    if update_ops is None:
      update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
    # Make sure update ops are ran before computing loss.
    if update_ops:
      loss = control_flow_ops.with_dependencies(list(update_ops), loss)

    # Learning rate variable, with possible decay.
    lr = None
    if learning_rate is not None:
      if (isinstance(learning_rate, ops.Tensor) and
          learning_rate.get_shape().ndims == 0):
        lr = learning_rate
      elif isinstance(learning_rate, float):
        if learning_rate < 0.0:
          raise ValueError("Invalid learning_rate %s.", learning_rate)
        lr = vs.get_variable(
            "learning_rate", [],
            trainable=False,
            initializer=init_ops.constant_initializer(learning_rate))
      else:
        raise ValueError("Learning rate should be 0d Tensor or float. "
                         "Got %s of type %s" % (str(learning_rate),
                                                str(type(learning_rate))))
    if summaries is None:
      summaries = ["loss", "learning_rate", "global_gradient_norm"]
    else:
      for summ in summaries:
        if summ not in OPTIMIZER_SUMMARIES:
          raise ValueError("Summaries should be one of [%s], you provided %s." %
                           (", ".join(OPTIMIZER_SUMMARIES), summ))
    if learning_rate is not None and learning_rate_decay_fn is not None:
      if global_step is None:
        raise ValueError("global_step is required for learning_rate_decay_fn.")
      lr = learning_rate_decay_fn(lr, global_step)
      if "learning_rate" in summaries:
        summary.scalar("learning_rate", lr)

    # Create optimizer, given specified parameters.
    if isinstance(optimizer, six.string_types):
      if lr is None:
        raise ValueError("Learning rate is None, but should be specified if "
                         "optimizer is string (%s)." % optimizer)
      if optimizer not in OPTIMIZER_CLS_NAMES:
        raise ValueError(
            "Optimizer name should be one of [%s], you provided %s." %
            (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
      opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
    elif (isinstance(optimizer, type) and
          issubclass(optimizer, optimizer_.Optimizer)):
      if lr is None:
        raise ValueError("Learning rate is None, but should be specified if "
                         "optimizer is class (%s)." % optimizer)
      opt = optimizer(learning_rate=lr)
    elif isinstance(optimizer, optimizer_.Optimizer):
      opt = optimizer
    elif callable(optimizer):
      if learning_rate is not None:
        opt = optimizer(lr)
      else:
        opt = optimizer()
      if not isinstance(opt, optimizer_.Optimizer):
        raise ValueError("Unrecognized optimizer: function should return "
                         "subclass of Optimizer. Got %s." % str(opt))
    else:
      raise ValueError("Unrecognized optimizer: should be string, "
                       "subclass of Optimizer, instance of "
                       "subclass of Optimizer or function with one argument. "
                       "Got %s." % str(optimizer))

    # All trainable variables, if specific variables are not specified.
    if variables is None:
      variables = vars_.trainable_variables()

    # Compute gradients.
    gradients = opt.compute_gradients(
        loss if loss_scale==1.0 else loss_scale*loss,
        variables,
        colocate_gradients_with_ops=colocate_gradients_with_ops)
    if loss_scale!=1.0:
      gradients = _multiply_gradients_const(gradients, 1.0 / loss_scale)

    # LARS gradient re-scaling
    if LARS_nu is not None and isinstance(LARS_nu, float):
      for idx, (g, v) in enumerate(gradients):
        v_norm = linalg_ops.norm(tensor=v, ord=2)
        g_norm = linalg_ops.norm(tensor=g, ord=2)
        lars_local_lr = control_flow_ops.cond(
          pred = math_ops.logical_and(math_ops.not_equal(v_norm, array_ops.constant(0.0)),
                                      math_ops.not_equal(g_norm, array_ops.constant(0.0))),
          true_fn = lambda: LARS_nu * v_norm / g_norm,
          false_fn = lambda: LARS_epsilon)
        gradients[idx] = (math_ops.scalar_mul(lars_local_lr, g), v)

    # Optionally add gradient noise.
    if gradient_noise_scale is not None:
      gradients = _add_scaled_noise_to_gradients(gradients,
                                                 gradient_noise_scale)

    # Multiply some gradients.
    if gradient_multipliers is not None:
      gradients = _multiply_gradients(gradients, gradient_multipliers)
      if not gradients:
        raise ValueError(
            "Empty list of (gradient, var) pairs encountered. This is most "
            "likely to be caused by an improper value of gradient_multipliers.")

    if "global_gradient_norm" in summaries or "gradient_norm" in summaries:
      summary.scalar("global_norm/gradient_norm",
                     clip_ops.global_norm(list(zip(*gradients))[0]))

    # Optionally clip gradients by global norm.
    if isinstance(clip_gradients, float):
      gradients = _clip_gradients_by_norm(gradients, clip_gradients)
    elif callable(clip_gradients):
      gradients = clip_gradients(gradients)
    elif clip_gradients is not None:
      raise ValueError(
          "Unknown type %s for clip_gradients" % type(clip_gradients))

    # Add scalar summary for loss.
    if "loss" in summaries:
      summary.scalar("loss", loss)

    # Add histograms for variables, gradients and gradient norms.
    for gradient, variable in gradients:
      if isinstance(gradient, ops.IndexedSlices):
        grad_values = gradient.values
      else:
        grad_values = gradient

      if grad_values is not None:
        var_name = variable.name.replace(":", "_")
        if "gradients" in summaries:
          summary.histogram("gradients/%s" % var_name, grad_values)
        if "gradient_norm" in summaries:
          summary.scalar("gradient_norm/%s" % var_name,
                         clip_ops.global_norm([grad_values]))

    if clip_gradients is not None and ("global_gradient_norm" in summaries or
                                       "gradient_norm" in summaries):
      summary.scalar("global_norm/clipped_gradient_norm",
                     clip_ops.global_norm(list(zip(*gradients))[0]))

    # Create gradient updates.
    grad_updates = opt.apply_gradients(
        gradients,
        global_step=global_step if increment_global_step else None,
        name="train")

    # Ensure the train_tensor computes grad_updates.
    train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

    return train_tensor
Example #49
0
def _eunn_param(hidden_size, capacity=2, fft=False, comp=True):
    """
    Create parameters and do the initial preparations
    """
    theta_phi_initializer = init_ops.random_uniform_initializer(-np.pi, np.pi)
    if fft:
        capacity = int(np.ceil(np.log2(hidden_size)))

        diag_list_0 = []
        off_list_0 = []
        varsize = 0
        for i in range(capacity):
            size = capacity - i
            normal_size = (hidden_size // (2 ** size)) * (2 ** (size - 1))
            extra_size = max(0, (hidden_size % (2 ** size)) - (2 ** (size - 1)))
            varsize += normal_size + extra_size

        params_theta = vs.get_variable("theta_0", [varsize], initializer=theta_phi_initializer)
        cos_theta = math_ops.cos(params_theta)
        sin_theta = math_ops.sin(params_theta)

        if comp:
            params_phi = vs.get_variable("phi_0", [varsize], initializer=theta_phi_initializer)
            cos_phi = math_ops.cos(params_phi)
            sin_phi = math_ops.sin(params_phi)

            cos_list_0 = math_ops.complex(cos_theta, array_ops.zeros_like(cos_theta))
            cos_list_1 = math_ops.complex(math_ops.multiply(cos_theta, cos_phi), math_ops.multiply(cos_theta, sin_phi))
            sin_list_0 = math_ops.complex(sin_theta, array_ops.zeros_like(sin_theta))
            sin_list_1 = math_ops.complex(-math_ops.multiply(sin_theta, cos_phi), -math_ops.multiply(sin_theta, sin_phi))

        last = 0
        for i in range(capacity):
            size = capacity - i
            normal_size = (hidden_size // (2 ** size)) * (2 ** (size - 1))
            extra_size = max(0, (hidden_size % (2 ** size)) - (2 ** (size - 1)))

            if comp:
                cos_list_normal = array_ops.concat([array_ops.slice(cos_list_0, [last], [normal_size]), array_ops.slice(cos_list_1, [last], [normal_size])], 0)
                sin_list_normal = array_ops.concat([array_ops.slice(sin_list_0, [last], [normal_size]), -array_ops.slice(sin_list_1, [last], [normal_size])], 0)
                last += normal_size

                cos_list_extra = array_ops.concat([array_ops.slice(cos_list_0, [last], [extra_size]), math_ops.complex(tf.ones([hidden_size - 2*normal_size - 2*extra_size]), tf.zeros([hidden_size - 2*normal_size - 2*extra_size])), array_ops.slice(cos_list_1, [last], [extra_size])], 0)
                sin_list_extra = array_ops.concat([array_ops.slice(sin_list_0, [last], [extra_size]), math_ops.complex(tf.zeros([hidden_size - 2*normal_size - 2*extra_size]), tf.zeros([hidden_size - 2*normal_size - 2*extra_size])), -array_ops.slice(sin_list_1, [last], [extra_size])], 0)
                last += extra_size

            else:
                cos_list_normal = array_ops.slice(cos_theta, [last], [normal_size])
                cos_list_normal = array_ops.concat([cos_list_normal, cos_list_normal], 0)
                cos_list_extra = array_ops.slice(cos_theta, [last+normal_size], [extra_size])
                cos_list_extra = array_ops.concat([cos_list_extra, tf.ones([hidden_size - 2*normal_size - 2*extra_size]), cos_list_extra], 0)

                sin_list_normal = array_ops.slice(sin_theta, [last], [normal_size])
                sin_list_normal = array_ops.concat([sin_list_normal, -sin_list_normal], 0)
                sin_list_extra = array_ops.slice(sin_theta, [last+normal_size], [extra_size])
                sin_list_extra = array_ops.concat([sin_list_extra, tf.zeros([hidden_size - 2*normal_size - 2*extra_size]), -sin_list_extra], 0)

                last += normal_size + extra_size

            if normal_size != 0:
                cos_list_normal = array_ops.reshape(array_ops.transpose(array_ops.reshape(cos_list_normal, [-1, 2*normal_size//(2**size)])), [-1])
                sin_list_normal = array_ops.reshape(array_ops.transpose(array_ops.reshape(sin_list_normal, [-1, 2*normal_size//(2**size)])), [-1])

            cos_list = array_ops.concat([cos_list_normal, cos_list_extra], 0)
            sin_list = array_ops.concat([sin_list_normal, sin_list_extra], 0)
            diag_list_0.append(cos_list)
            off_list_0.append(sin_list)

        diag_vec = array_ops.stack(diag_list_0, 0)
        off_vec = array_ops.stack(off_list_0, 0)

    else:
        capacity_b = capacity//2
        capacity_a = capacity - capacity_b

        hidden_size_a = hidden_size//2
        hidden_size_b = (hidden_size-1)//2

        params_theta_0 = vs.get_variable("theta_0", [capacity_a, hidden_size_a], initializer=theta_phi_initializer)
        cos_theta_0 = array_ops.reshape(math_ops.cos(params_theta_0), [capacity_a, -1, 1])
        sin_theta_0 = array_ops.reshape(math_ops.sin(params_theta_0), [capacity_a, -1, 1])

        params_theta_1 = vs.get_variable("theta_1", [capacity_b, hidden_size_b], initializer=theta_phi_initializer)
        cos_theta_1 = array_ops.reshape(math_ops.cos(params_theta_1), [capacity_b, -1, 1])
        sin_theta_1 = array_ops.reshape(math_ops.sin(params_theta_1), [capacity_b, -1, 1])

        if comp:
            params_phi_0 = vs.get_variable("phi_0", [capacity_a, hidden_size_a], initializer=theta_phi_initializer)
            cos_phi_0 = array_ops.reshape(math_ops.cos(params_phi_0), [capacity_a, -1, 1])
            sin_phi_0 = array_ops.reshape(math_ops.sin(params_phi_0), [capacity_a, -1, 1])

            cos_list_0_re = array_ops.reshape(array_ops.concat([cos_theta_0, math_ops.multiply(cos_theta_0, cos_phi_0)], 2), [capacity_a, -1])
            cos_list_0_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(cos_theta_0), math_ops.multiply(cos_theta_0, sin_phi_0)], 2), [capacity_a, -1])
            if hidden_size_a*2 != hidden_size:
                cos_list_0_re = array_ops.concat([cos_list_0_re, tf.ones([capacity_a, 1])], 1)
                cos_list_0_im = array_ops.concat([cos_list_0_im, tf.zeros([capacity_a, 1])], 1)
            cos_list_0 = math_ops.complex(cos_list_0_re, cos_list_0_im)

            sin_list_0_re = array_ops.reshape(array_ops.concat([sin_theta_0, - math_ops.multiply(sin_theta_0, cos_phi_0)], 2), [capacity_a, -1])
            sin_list_0_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(sin_theta_0), - math_ops.multiply(sin_theta_0, sin_phi_0)], 2), [capacity_a, -1])
            if hidden_size_a*2 != hidden_size:
                sin_list_0_re = array_ops.concat([sin_list_0_re, tf.zeros([capacity_a, 1])], 1)
                sin_list_0_im = array_ops.concat([sin_list_0_im, tf.zeros([capacity_a, 1])], 1)
            sin_list_0 = math_ops.complex(sin_list_0_re, sin_list_0_im)

            params_phi_1 = vs.get_variable("phi_1", [capacity_b, hidden_size_b], initializer=theta_phi_initializer)
            cos_phi_1 = array_ops.reshape(math_ops.cos(params_phi_1), [capacity_b, -1, 1])
            sin_phi_1 = array_ops.reshape(math_ops.sin(params_phi_1), [capacity_b, -1, 1])

            cos_list_1_re = array_ops.reshape(array_ops.concat([cos_theta_1, math_ops.multiply(cos_theta_1, cos_phi_1)], 2), [capacity_b, -1])
            cos_list_1_re = array_ops.concat([tf.ones((capacity_b, 1)), cos_list_1_re], 1)
            cos_list_1_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(cos_theta_1), math_ops.multiply(cos_theta_1, sin_phi_1)], 2), [capacity_b, -1])
            cos_list_1_im = array_ops.concat([tf.zeros((capacity_b, 1)), cos_list_1_im], 1)
            if hidden_size_b*2 != hidden_size-1:
                cos_list_1_re = array_ops.concat([cos_list_1_re, tf.ones([capacity_b, 1])], 1)
                cos_list_1_im = array_ops.concat([cos_list_1_im, tf.zeros([capacity_b, 1])], 1)
            cos_list_1 = math_ops.complex(cos_list_1_re, cos_list_1_im)

            sin_list_1_re = array_ops.reshape(array_ops.concat([sin_theta_1, -math_ops.multiply(sin_theta_1, cos_phi_1)], 2), [capacity_b, -1])
            sin_list_1_re = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1_re], 1)
            sin_list_1_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(sin_theta_1), -math_ops.multiply(sin_theta_1, sin_phi_1)], 2), [capacity_b, -1])
            sin_list_1_im = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1_im], 1)
            if hidden_size_b*2 != hidden_size-1:
                sin_list_1_re = array_ops.concat([sin_list_1_re, tf.zeros([capacity_b, 1])], 1)
                sin_list_1_im = array_ops.concat([sin_list_1_im, tf.zeros([capacity_b, 1])], 1)
            sin_list_1 = math_ops.complex(sin_list_1_re, sin_list_1_im)
        else:
            cos_list_0 = array_ops.reshape(array_ops.concat([cos_theta_0, cos_theta_0], 2), [capacity_a, -1])
            sin_list_0 = array_ops.reshape(array_ops.concat([sin_theta_0, -sin_theta_0], 2), [capacity_a, -1])
            if hidden_size_a*2 != hidden_size:
                cos_list_0 = array_ops.concat([cos_list_0, tf.ones([capacity_a, 1])], 1)
                sin_list_0 = array_ops.concat([sin_list_0, tf.zeros([capacity_a, 1])], 1)

            cos_list_1 = array_ops.reshape(array_ops.concat([cos_theta_1, cos_theta_1], 2), [capacity_b, -1])
            cos_list_1 = array_ops.concat([tf.ones((capacity_b, 1)), cos_list_1], 1)
            sin_list_1 = array_ops.reshape(array_ops.concat([sin_theta_1, -sin_theta_1], 2), [capacity_b, -1])
            sin_list_1 = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1], 1)
            if hidden_size_b*2 != hidden_size-1:
                cos_list_1 = array_ops.concat([cos_list_1, tf.zeros([capacity_b, 1])], 1)
                sin_list_1 = array_ops.concat([sin_list_1, tf.zeros([capacity_b, 1])], 1)

        if capacity_b != capacity_a:
            if comp:
                cos_list_1 = array_ops.concat([cos_list_1, math_ops.complex(tf.zeros([1, hidden_size]), tf.zeros([1, hidden_size]))], 0)
                sin_list_1 = array_ops.concat([sin_list_1, math_ops.complex(tf.zeros([1, hidden_size]), tf.zeros([1, hidden_size]))], 0)
            else:
                cos_list_1 = array_ops.concat([cos_list_1, tf.zeros([1, hidden_size])], 0)
                sin_list_1 = array_ops.concat([sin_list_1, tf.zeros([1, hidden_size])], 0)

        diag_vec = tf.reshape(tf.concat([cos_list_0, cos_list_1], 1), [capacity_a*2, hidden_size])
        off_vec = tf.reshape(tf.concat([sin_list_0, sin_list_1], 1), [capacity_a*2, hidden_size])

        if capacity_b != capacity_a:
            diag_vec = tf.slice(diag_vec, [0, 0], [capacity, hidden_size])
            off_vec = tf.slice(off_vec, [0, 0], [capacity, hidden_size])

    def _toTensorArray(elems):

        elems = ops.convert_to_tensor(elems)
        n = array_ops.shape(elems)[0]
        elems_ta = tensor_array_ops.TensorArray(dtype=elems.dtype, size=n, dynamic_size=False, infer_shape=True, clear_after_read=False)
        elems_ta = elems_ta.unstack(elems)
        return elems_ta

    diag_vec = _toTensorArray(diag_vec)
    off_vec = _toTensorArray(off_vec)
    if comp:
        omega = vs.get_variable("omega", [hidden_size], initializer=theta_phi_initializer)
        diag = math_ops.complex(math_ops.cos(omega), math_ops.sin(omega))
    else:
        diag = None

    return diag_vec, off_vec, diag, capacity
Example #50
0
def internally_var_scoped_function(scope_name):
    with variable_scope.variable_scope(scope_name):
        return variable_scope.get_variable(
            "dummy", shape=[1], initializer=init_ops.zeros_initializer())
Example #51
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.
    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.
      scope: VariableScope for the created subgraph; defaults to "lstm_cell".
    Returns:
      A tuple containing:
      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.
    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, self._num_units])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with _checked_scope(self,
                            scope or "lstm_cell",
                            initializer=self._initializer,
                            reuse=self._reuse) as unit_scope:
            if self._num_unit_shards is not None:
                unit_scope.set_partitioner(
                    partitioned_variables.fixed_size_partitioner(
                        self._num_unit_shards))
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            input_contributions = _linear([inputs],
                                          4 * self._num_units,
                                          bias=True)
            with tf.variable_scope('projection'):
                mprev_projected = _linear([m_prev], self._num_proj, bias=False)
            with tf.variable_scope('antiprojection'):
                mprev_contributions = _linear([mprev_projected],
                                              4 * self._num_units,
                                              bias=False)
            lstm_matrix = input_contributions + mprev_contributions
            i, j, f, o = array_ops.split(value=lstm_matrix,
                                         num_or_size_splits=4,
                                         axis=1)
            # Diagonal connections
            if self._use_peepholes:
                with vs.variable_scope(unit_scope) as projection_scope:
                    if self._num_unit_shards is not None:
                        projection_scope.set_partitioner(None)
                    w_f_diag = vs.get_variable("w_f_diag",
                                               shape=[self._num_units],
                                               dtype=dtype)
                    w_i_diag = vs.get_variable("w_i_diag",
                                               shape=[self._num_units],
                                               dtype=dtype)
                    w_o_diag = vs.get_variable("w_o_diag",
                                               shape=[self._num_units],
                                               dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                     c_prev +
                     sigmoid(i + w_i_diag * c_prev) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev +
                     sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)
                # pylint: enable=invalid-unary-operand-type
            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                m = sigmoid(o) * self._activation(c)

            if self._num_proj is not None:
                with vs.variable_scope("projection", reuse=True) as proj_scope:
                    if self._num_proj_shards is not None:
                        proj_scope.set_partitioner(
                            partitioned_variables.fixed_size_partitioner(
                                self._num_proj_shards))
                    out = _linear(m, self._num_proj, bias=False)

                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    out = clip_ops.clip_by_value(out, -self._proj_clip,
                                                 self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type
            else:
                out = m

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))

        return out, new_state
Example #52
0
def pointer_decoder(decoder_inputs, initial_state, attention_states, cell,
                    feed_prev=True, dtype=dtypes.float32, scope=None):
    """RNN decoder with pointer net for the sequence-to-sequence model.
    Args:
      decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
      initial_state: 2D Tensor [batch_size x cell.state_size].
      attention_states: 3D Tensor [batch_size x attn_length x attn_size].
      cell: rnn_cell.RNNCell defining the cell function and size.
      dtype: The dtype to use for the RNN initial state (default: tf.float32).
      scope: VariableScope for the created subgraph; default: "pointer_decoder".
    Returns:
      outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
        [batch_size x output_size]. These represent the generated outputs.
        Output i is computed from input i (which is either i-th decoder_inputs.
        First, we run the cell
        on a combination of the input and previous attention masks:
          cell_output, new_state = cell(linear(input, prev_attn), prev_state).
        Then, we calculate new attention masks:
          new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
        and then we calculate the output:
          output = linear(cell_output, new_attn).
      states: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        Each item is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    if not decoder_inputs:
        raise ValueError("Must provide at least 1 input to attention decoder.")
    if not attention_states.get_shape()[1:2].is_fully_defined():
        raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                         % attention_states.get_shape())

    with vs.variable_scope(scope or "point_decoder"):
        batch_size = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
        input_size = decoder_inputs[0].get_shape()[1].value
        attn_length = attention_states.get_shape()[1].value
        attn_size = attention_states.get_shape()[2].value

        # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
        hidden = array_ops.reshape(
            attention_states, [-1, attn_length, 1, attn_size])

        attention_vec_size = attn_size  # Size of query vectors for attention.
        k = vs.get_variable("AttnW", [1, 1, attn_size, attention_vec_size])
        hidden_features = nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
        v = vs.get_variable("AttnV", [attention_vec_size])

        states = [initial_state]

        def attention(query):
            """Point on hidden using hidden_features and query."""
            with vs.variable_scope("Attention"):
                y = core_rnn_cell_impl._linear(query, attention_vec_size, True)
                y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
                # Attention mask is a softmax of v^T * tanh(...).
                s = math_ops.reduce_sum(
                    v * math_ops.tanh(hidden_features + y), [2, 3])
                return s

        outputs = []
        prev = None
        batch_attn_size = array_ops.stack([batch_size, attn_size])
        attns = array_ops.zeros(batch_attn_size, dtype=dtype)

        attns.set_shape([None, attn_size])
        inps = []
        for i in range(len(decoder_inputs)):
            if i > 0:
                vs.get_variable_scope().reuse_variables()
            inp = decoder_inputs[i]

            if feed_prev and i > 0:
                inp = tf.stack(decoder_inputs)
                inp = tf.transpose(inp, perm=[1, 0, 2])
                inp = tf.reshape(inp, [-1, attn_length, input_size])
                inp = tf.reduce_sum(inp * tf.reshape(tf.nn.softmax(output), [-1, attn_length, 1]), 1)
                inp = tf.stop_gradient(inp)
                inps.append(inp)

            # Use the same inputs in inference, order internaly

            # Merge input and previous attentions into one vector of the right size.
            x = core_rnn_cell_impl._linear([inp, attns], cell.output_size, True)
            # Run the RNN.
            cell_output, new_state = cell(x, states[-1])
            states.append(new_state)
            # Run the attention mechanism.
            output = attention(new_state)

            outputs.append(output)

    return outputs, states, inps
    def __call__(self, inputs, state, scope=None):
        # vars from different layers.
        h_bottom, z_bottom, h_top_prev = inputs
        # vars from the previous time step on the same layer
        h_prev, z_prev = state

        # I'm calling the the 'z gate' in GRU the 'o gate', since z means something different in HM-LSTM.
        # Not including the candidate hidden state (c_tilda, or g as I call it, since it needs to be
        # multiplied by r first.
        # Need enough rows in the shared matrix for r, o, z_stochastic_tilda
        num_rows = 2 * self._num_units + 1

        # scope: optional name for the variable scope, defaults to "HmGruCell"
        with vs.variable_scope(scope or type(self).__name__):
            # Matrix U_l^l
            U_curr = vs.get_variable("U_curr",
                                     [h_prev.get_shape()[1], num_rows],
                                     dtype=tf.float32)
            # Matrix U_{l+1}^l
            U_top = vs.get_variable("U_top",
                                    [h_bottom.get_shape()[1], num_rows],
                                    dtype=tf.float32)
            # Matrix W_{l-1}^l
            W_bottom = vs.get_variable("W_bottom",
                                       [h_bottom.get_shape()[1], num_rows],
                                       dtype=tf.float32)
            # b_l
            bias = vs.get_variable("bias", [num_rows], dtype=tf.float32)

            s_curr = tf.matmul(h_prev, U_curr)
            s_top = z_prev * tf.matmul(h_top_prev, U_top)
            s_bottom = z_bottom * tf.matmul(h_bottom, W_bottom)
            gate_logits = s_curr + s_top + s_bottom + bias

            r_logits = tf.slice(gate_logits, [0, 0], [-1, self._num_units])
            o_logits = tf.slice(gate_logits, [0, self._num_units],
                                [-1, self._num_units])
            z_t_logit = tf.slice(gate_logits, [0, 2 * self._num_units],
                                 [-1, 1])

            r = tf.sigmoid(r_logits)
            o = tf.sigmoid(o_logits)
            # This is the stochastic neuron
            z_new = binary_wrapper(
                z_t_logit,
                pass_through=
                False,  # TODO make this true if you do slope annealing
                stochastic_tensor=tf.constant(
                    True),  # TODO make this false if you do slope annealing
                slope_tensor=None)  # TODO set this if you do slope annealing

            # Now calculate the candidate gate (c_tilda aka g)
            # Matrix U_l^l (for just g)
            U_g_curr = vs.get_variable(
                "U_g_curr", [h_prev.get_shape()[1], self._num_units],
                dtype=tf.float32)
            # Matrix U_{l+1}^l (for just g)
            U_g_top = vs.get_variable(
                "U_g_top", [h_bottom.get_shape()[1], self._num_units],
                dtype=tf.float32)
            # Matrix W_{l-1}^l (for just g)
            W_g_bottom = vs.get_variable(
                "W_g_bottom", [h_bottom.get_shape()[1], self._num_units],
                dtype=tf.float32)
            # b_l (for just g)
            bias_g = vs.get_variable("bias_g", [self._num_units],
                                     dtype=tf.float32)
            s_g_curr = tf.matmul(r * h_prev, U_g_curr)
            s_g_top = z_prev * tf.matmul(r * h_top_prev, U_g_top)
            s_g_bottom = z_bottom * tf.matmul(r * h_bottom, W_g_bottom)
            g_logits = s_g_curr + s_g_top + s_g_bottom + bias_g
            g = tf.tanh(g_logits)

            z_zero_mask = tf.equal(z_prev, tf.zeros_like(z_prev))
            copy_mask = tf.to_float(
                tf.logical_and(z_zero_mask,
                               tf.equal(z_bottom, tf.zeros_like(z_bottom))))
            update_mask = tf.to_float(
                tf.logical_and(z_zero_mask, tf.cast(z_bottom, tf.bool)))
            flush_mask = z_prev

            # TODO put this behind a test flag
            # tf.assert_equal(tf.reduce_sum(copy_mask + update_mask + flush_mask),
            #                tf.reduce_sum(tf.ones_like(flush_mask))) # TODO
            h_flush = o * g
            h_update = (tf.ones_like(o) - o) * h_prev + h_flush
            h_new = copy_mask * h_prev + update_mask * h_update + flush_mask * h_flush

            return h_new, HmGruStateTuple(h_new, z_new)
Example #54
0
  def __call__(self, input_, state, scope=None):
    """Run one step of LSTM.

    Args:
      input_: input Tensor, 2D, batch x num_units.
      state: state Tensor, 2D, batch x state_size.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A 2D, batch x output_dim, Tensor representing the output of the LSTM
        after reading "input_" when previous state was "state".
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - A 2D, batch x state_size, Tensor representing the new state of LSTM
        after reading "input_" when previous state was "state".
    """
    num_proj = self._num_units if self._num_proj is None else self._num_proj

    c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
    m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

    dtype = input_.dtype

    with vs.variable_scope(scope or type(self).__name__):  # "LSTMCell"
      sharded_w = _get_sharded_variable(
          "W", [self.input_size + num_proj, 4 * self._num_units],
          self._initializer, dtype, self._num_unit_shards)

      b = vs.get_variable(
          "B", shape=[4 * self._num_units],
          initializer=array_ops.zeros_initializer, dtype=dtype)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      cell_inputs = array_ops.concat(1, [input_, m_prev])
      lstm_matrix = nn_ops.bias_add(
          _matmul_with_sharded_variable(cell_inputs, sharded_w), b)
      i, j, f, o = array_ops.split(1, 4, lstm_matrix)

      # Diagonal connections
      if self._use_peepholes:
        w_f_diag = vs.get_variable(
            "W_F_diag", shape=[self._num_units],
            initializer=self._initializer,
            dtype=dtype)
        w_i_diag = vs.get_variable(
            "W_I_diag", shape=[self._num_units],
            initializer=self._initializer,
            dtype=dtype)
        w_o_diag = vs.get_variable(
            "W_O_diag", shape=[self._num_units],
            initializer=self._initializer,
            dtype=dtype)

      if self._use_peepholes:
        c = (sigmoid(f + 1 + w_f_diag * c_prev) * c_prev +
             sigmoid(i + w_i_diag * c_prev) * tanh(j))
      else:
        c = (sigmoid(f + 1) * c_prev + sigmoid(i) * tanh(j))

      if self._cell_clip is not None:
        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)

      if self._use_peepholes:
        m = sigmoid(o + w_o_diag * c) * tanh(c)
      else:
        m = sigmoid(o) * tanh(c)

      if self._num_proj is not None:
        sharded_w_proj = _get_sharded_variable(
            "W_P", [self._num_units, self._num_proj], self._initializer,
            dtype, self._num_proj_shards)

        m = _matmul_with_sharded_variable(m, sharded_w_proj)

    return m, array_ops.concat(1, [c, m])
Example #55
0
def decode_spectrum(encoded_spectrum, intensity_inputs, decoder_inputs_emb,
                    keep_conv, keep_dense, scope):
    #~ print("decode_spectrum()")

    single_cell = rnn_cell.BasicLSTMCell(num_units=data_utils.num_units,
                                         state_is_tuple=True)
    #~ single_cell = rnn_cell.BasicRNNCell(num_units=data_utils.num_units)
    #~ single_cell = rnn_cell.GRUCell(num_units=data_utils.num_units)
    if (data_utils.num_layers > 1):
        cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] *
                                           data_utils.num_layers)
    else:
        cell = single_cell
    cell = rnn_cell.DropoutWrapper(cell,
                                   input_keep_prob=keep_dense,
                                   output_keep_prob=keep_dense)

    with variable_scope.variable_scope(scope):

        # INTENSITY-Model Parameters
        # intensity input [128,27,2,10]
        #
        if (data_utils.FLAGS.shared):  # shared-weight
            dense1_input_size = data_utils.num_ion * data_utils.WINDOW_SIZE
            dense1_output_size = 1024
            #
            dense1_W = variable_scope.get_variable(
                name="dense1_W_0",
                shape=[dense1_input_size, dense1_output_size],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            dense1_B = variable_scope.get_variable(
                name="dense1_B_0",
                shape=[dense1_output_size],
                initializer=tf.constant_initializer(0.1))
            #
            dense_linear_W = variable_scope.get_variable(
                name="dense_linear_W", shape=[dense1_output_size, 1])
            #
            dense_linear_B = variable_scope.get_variable(
                name="dense_linear_B",
                shape=[1],
                initializer=tf.constant_initializer(0.1))
        #
        else:  # joint-weight

            # conv1: [128,8,20,26] >> [128,8,20,64] with kernel [1,3,26,64]
            conv1_weights = tf.get_variable(
                name="conv1_weights",
                shape=[1, 3, data_utils.vocab_size, 64],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            conv1_biases = tf.get_variable(
                name="conv1_biases",
                shape=[64],
                initializer=tf.constant_initializer(0.1))

            # conv2: [128,8,20,64] >> [128,8,20,64] with kernel [1,2,64,64]
            conv2_weights = tf.get_variable(
                name="conv2_weights",
                shape=[1, 2, 64, 64],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            conv2_biases = tf.get_variable(
                name="conv2_biases",
                shape=[64],
                initializer=tf.constant_initializer(0.1))

            # max_pool: [128,8,20,64] >> [128,8,10,64]

            # dense1: # 4D >> [128,512]
            dense1_input_size = data_utils.num_ion * (
                data_utils.WINDOW_SIZE // 2) * 64  # data_utils.vocab_size
            dense1_output_size = 512
            dense1_weights = tf.get_variable(
                "dense1_weights",
                shape=[dense1_input_size, dense1_output_size],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            dense1_biases = tf.get_variable(
                "dense1_biases",
                shape=[dense1_output_size],
                initializer=tf.constant_initializer(0.1))
            #
            # for testing
            dense1_W_penalty = tf.mul(tf.nn.l2_loss(dense1_weights),
                                      data_utils.l2_loss_weight,
                                      name='dense1_W_penalty')

            # dense2: # [128,512] >> [128,512]
            #~ dense2_input_size = 512
            #~ dense2_output_size = 512
            #~ dense2_weights = tf.get_variable("dense2_weights",
            #~ shape=[dense2_input_size, dense2_output_size],
            #~ initializer=tf.uniform_unit_scaling_initializer(1.43))
            #~ dense2_biases = tf.get_variable("dense2_biases", shape=[dense2_output_size], initializer=tf.constant_initializer(0.1))

            # logit_linear: [128,512] >> [128,27]
            #~ linear_input_size = 512
            #~ linear_output_size = data_utils.vocab_size
            #~ linear_weights = tf.get_variable("linear_weights",
            #~ shape=[linear_input_size, linear_output_size])
            #~ linear_biases = tf.get_variable("linear_biases", shape=[linear_output_size], initializer=tf.constant_initializer(0.0))

        # LSTM-Intensity Connection-Model Parameters
        #
        #~ denseL_W = variable_scope.get_variable(name="denseL_W",shape=[data_utils.vocab_size,data_utils.vocab_size],
        #~ initializer=tf.uniform_unit_scaling_initializer(1.43))
        #~ denseI_W = variable_scope.get_variable(name="denseI_W",shape=[data_utils.vocab_size,data_utils.vocab_size],
        #~ initializer=tf.uniform_unit_scaling_initializer(1.43))
        #~ denseC_B = variable_scope.get_variable(name="denseC_B",shape=[data_utils.vocab_size],
        #~ initializer=tf.constant_initializer(0.1))
        # cat
        dense_concat_W = variable_scope.get_variable(
            name="dense_concat_W",
            shape=[512 + 512, 512],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        dense_concat_B = variable_scope.get_variable(
            name="dense_concat_B",
            shape=[512],
            initializer=tf.constant_initializer(0.1))

        # DECODING - SPECTRUM as Input 0
        with variable_scope.variable_scope("LSTM_cell"):
            #
            input0 = encoded_spectrum
            #
            batch_size = array_ops.shape(input0)[0]
            zero_state = cell.zero_state(batch_size=batch_size,
                                         dtype=tf.float32)
            #
            #~ _, lstm_state = cell(inputs=input0,state=zero_state)
            # nobi
            _, lstm_state_0 = cell(inputs=input0, state=zero_state)

        # nobi
        # DECODING - lstm_input_projected
        with variable_scope.variable_scope("LSTM_input_projected"):
            lstm_input_projected_W = variable_scope.get_variable(
                name="lstm_input_projected_W",
                shape=[data_utils.embedding_size, data_utils.num_units])
            #
            lstm_input_projected_B = variable_scope.get_variable(
                name="lstm_input_projected_B",
                shape=[data_utils.num_units],
                initializer=tf.constant_initializer(0.1))

        # DECODING LOOP
        # nobi
        outputs = []
        AA_1 = decoder_inputs_emb[0]  # padding [AA_1, AA_2, ?] with GO/EOS
        for i, AA_2 in enumerate(decoder_inputs_emb):

            # nobi
            if (i > 0
                ):  # to-do-later: bring variable definitions out of the loop
                variable_scope.get_variable_scope().reuse_variables()

            # INTENSITY-Model
            candidate_intensity = intensity_inputs[i]  # [128,27,2,10]
            #
            if (data_utils.FLAGS.shared):  # shared-weight
                candidate_intensity_reshape = tf.reshape(
                    candidate_intensity,
                    shape=[-1, dense1_input_size])  # [128*27,2*10]
                #
                layer_dense1_input = candidate_intensity_reshape
                #
                layer_dense1 = tf.nn.relu(
                    tf.matmul(layer_dense1_input, dense1_W) +
                    dense1_B)  # [128*27,1024]
                #
                layer_dense1_drop = tf.nn.dropout(layer_dense1, keep_dense)
                #
                layer_dense1_output = tf.matmul(
                    layer_dense1_drop,
                    dense_linear_W) + dense_linear_B  # [128*27,1]
                #
                # Intensity output
                intensity_output = tf.reshape(layer_dense1_output,
                                              shape=[
                                                  -1, data_utils.vocab_size
                                              ])  # [128,27]
            #
            else:  # joint-weight

                # image_batch: [128,26,8,20] >> [128,8,20,26]
                # This is a bug, should be fixed at the input processing later.
                image_batch = tf.transpose(candidate_intensity,
                                           perm=[0, 2, 3, 1])  # [128,8,20,26]

                # conv1: [128,8,20,26] >> [128,8,20,64] with kernel [1,3,26,64]
                conv1 = tf.nn.relu(
                    tf.nn.conv2d(image_batch,
                                 conv1_weights,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME') + conv1_biases)

                # conv2: [128,8,20,64] >> [128,8,20,64] with kernel [1,2,64,64]
                conv2 = tf.nn.relu(
                    tf.nn.conv2d(conv1,
                                 conv2_weights,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME') + conv2_biases)
                conv2 = tf.nn.max_pool(conv2,
                                       ksize=[1, 1, 3, 1],
                                       strides=[1, 1, 2, 1],
                                       padding='SAME')  # [128,8,10,64]
                conv2 = tf.nn.dropout(conv2, keep_conv)

                # dense1: 4D >> [128,512]
                dense1_input = tf.reshape(
                    conv2, [-1, dense1_input_size])  # 2D flatten
                dense1 = tf.nn.relu(
                    tf.matmul(dense1_input, dense1_weights) +
                    dense1_biases)  # [128,512]

                # dense2: # [128,512] >> [128,512]
                #~ dense2 = tf.nn.relu(tf.matmul(dense1, dense2_weights) + dense2_biases) # [128,512]

                #~ dropout1 = tf.nn.dropout(dense2, keep_dense, name="dropout1")
                dropout1 = tf.nn.dropout(dense1, keep_dense, name="dropout1")

                # logit_linear: [128,512] >> [128,27]
                #~ intensity_output = tf.add(tf.matmul(dropout1, linear_weights), linear_biases) # [128,27]
                intensity_output = dropout1
                intensity_output_projected = rnn_cell._linear(
                    intensity_output,
                    data_utils.vocab_size,  # [128,27]
                    bias=True,
                    bias_start=0.1,
                    scope="intensity_output_projected")

            # nobi
            # LSTM-Model
            AA_1_projected = tf.matmul(
                AA_1, lstm_input_projected_W) + lstm_input_projected_B
            AA_2_projected = tf.matmul(
                AA_2, lstm_input_projected_W) + lstm_input_projected_B
            #
            with variable_scope.variable_scope("LSTM_cell"):
                #
                variable_scope.get_variable_scope().reuse_variables()
                #
                _, lstm_state_1 = cell(inputs=AA_1_projected,
                                       state=lstm_state_0)
                lstm_output, _ = cell(inputs=AA_2_projected,
                                      state=lstm_state_1)
                #
                AA_1 = AA_2
            #
            lstm_output_projected = rnn_cell._linear(
                lstm_output,
                data_utils.vocab_size,  # [128,27]
                bias=True,
                bias_start=0.1,
                scope="lstm_output_projected")

            # LSTM-Intensity Connection-Model >> OUTPUT
            #
            if (data_utils.FLAGS.use_intensity and data_utils.FLAGS.use_lstm):
                #
                #~ output_logit = tf.nn.relu(tf.matmul(lstm_output_projected,denseL_W) +
                #~ tf.matmul(intensity_output_projected,denseI_W) +
                #~ denseC_B)
                #
                # cat
                concat = tf.concat(concat_dim=1,
                                   values=[intensity_output, lstm_output])
                concat_dense = tf.nn.relu(
                    tf.matmul(concat, dense_concat_W) + dense_concat_B)
                concat_drop = tf.nn.dropout(concat_dense, keep_dense)
                #
                output_logit = rnn_cell._linear(
                    concat_drop,
                    data_utils.vocab_size,  # [128,27]
                    bias=True,
                    bias_start=0.1,
                    scope="concat_output_projected")
            #
            elif (data_utils.FLAGS.use_intensity):
                # intensity only (without LSTM >> up to 10% loss, especially at AA-accuracy?)
                output_logit = intensity_output_projected
            #
            elif (data_utils.FLAGS.use_lstm):
                output_logit = lstm_output_projected
            #
            else:
                print("ERROR: wrong LSTM-Intensity model specified!")
                sys.exit()
            #
            outputs.append(output_logit)

    return (outputs, dense1_W_penalty)
    def __call__(self, inputs, state, scope=None):
        # vars from different layers.
        h_bottom, z_bottom, h_top_prev = inputs
        # vars from the previous time step on the same layer
        c_prev, h_prev, z_prev = state

        # Need enough rows in the shared matrix for f, i, o, g, z_stochastic_tilda
        num_rows = 4 * self._num_units + 1

        # scope: optional name for the variable scope, defaults to "HmLstmCell"
        with vs.variable_scope(scope or type(self).__name__):  # "HmLstmCell"
            # Matrix U_l^l
            U_curr = vs.get_variable("U_curr",
                                     [h_prev.get_shape()[1], num_rows],
                                     dtype=tf.float32)
            # Matrix U_{l+1}^l
            # TODO This imples that the U matrix there has the same dimensionality as the
            # one used in equation 5. but that would only be true if you forced the h vectors
            # on the above layer to be equal in size to the ones below them. Is that a real restriction?
            # Or am I misunderstanding?
            U_top = vs.get_variable("U_top",
                                    [h_bottom.get_shape()[1], num_rows],
                                    dtype=tf.float32)
            # Matrix W_{l-1}^l
            W_bottom = vs.get_variable("W_bottom",
                                       [h_bottom.get_shape()[1], num_rows],
                                       dtype=tf.float32)
            # b_l
            bias = vs.get_variable("bias", [num_rows], dtype=tf.float32)

            s_curr = tf.matmul(h_prev, U_curr)
            s_top = z_prev * tf.matmul(h_top_prev, U_top)
            s_bottom = z_bottom * tf.matmul(h_bottom, W_bottom)
            gate_logits = s_curr + s_top + s_bottom + bias

            f_logits = tf.slice(gate_logits, [0, 0], [-1, self._num_units])
            i_logits = tf.slice(gate_logits, [0, self._num_units],
                                [-1, self._num_units])
            o_logits = tf.slice(gate_logits, [0, 2 * self._num_units],
                                [-1, self._num_units])
            g_logits = tf.slice(gate_logits, [0, 3 * self._num_units],
                                [-1, self._num_units])
            z_t_logit = tf.slice(gate_logits, [0, 4 * self._num_units],
                                 [-1, 1])

            f = tf.sigmoid(f_logits)
            i = tf.sigmoid(i_logits)
            o = tf.sigmoid(o_logits)
            g = tf.tanh(g_logits)

            # This is the stochastic neuron
            z_new = binary_wrapper(
                z_t_logit,
                pass_through=
                False,  # TODO make this true if you do slope annealing
                stochastic_tensor=tf.constant(
                    True),  # TODO make this false if you do slope annealing
                slope_tensor=None)  # TODO set this if you do slope annealing

            z_zero_mask = tf.equal(z_prev, tf.zeros_like(z_prev))
            copy_mask = tf.to_float(
                tf.logical_and(z_zero_mask,
                               tf.equal(z_bottom, tf.zeros_like(z_bottom))))
            update_mask = tf.to_float(
                tf.logical_and(z_zero_mask, tf.cast(z_bottom, tf.bool)))
            flush_mask = z_prev

            # TODO put this behind a test flag
            # tf.assert_equal(tf.reduce_sum(copy_mask + update_mask + flush_mask),
            # tf.reduce_sum(tf.ones_like(flush_mask))) # TODO

            c_flush = i * g
            c_update = f * c_prev + c_flush
            c_new = copy_mask * c_prev + update_mask * c_update + flush_mask * c_flush

            h_flush = o * tf.tanh(c_flush)
            h_update = o * tf.tanh(c_update)
            h_new = copy_mask * h_prev + update_mask * h_update + flush_mask * h_flush

            state_new = HmLstmStateTuple(c_new, h_new, z_new)
            return h_new, state_new
Example #57
0
def _get_variable(name, shape, initializer):
    return variable_scope.get_variable(name,
                                       shape=shape,
                                       initializer=initializer,
                                       dtype=dataType)
def _create_attention_score_fn(name,
                               num_units,
                               attention_option,
                               reuse,
                               dtype=dtypes.float32):
    """Different ways to compute attention scores.

  Args:
    name: to label variables.
    num_units: hidden state dimension.
    attention_option: how to compute attention, either "luong" or "bahdanau".
      "bahdanau": additive (Bahdanau et al., ICLR'2015)
      "luong": multiplicative (Luong et al., EMNLP'2015)
    reuse: whether to reuse variable scope.
    dtype: (default: `dtypes.float32`) data type to use.

  Returns:
    attention_score_fn: to compute similarity between key and target states.
  """
    with variable_scope.variable_scope(name, reuse=reuse):
        if attention_option == "bahdanau":
            query_w = variable_scope.get_variable("attnW",
                                                  [num_units, num_units],
                                                  dtype=dtype)
            score_v = variable_scope.get_variable("attnV", [num_units],
                                                  dtype=dtype)

        def attention_score_fn(query, keys, values):
            """Put attention masks on attention_values using attention_keys and query.

      Args:
        query: A Tensor of shape [batch_size, num_units].
        keys: A Tensor of shape [batch_size, attention_length, num_units].
        values: A Tensor of shape [batch_size, attention_length, num_units].

      Returns:
        context_vector: A Tensor of shape [batch_size, num_units].

      Raises:
        ValueError: if attention_option is neither "luong" or "bahdanau".


      """
            if attention_option == "bahdanau":
                # transform query
                query = math_ops.matmul(query, query_w)

                # reshape query: [batch_size, 1, num_units]
                query = array_ops.reshape(query, [-1, 1, num_units])

                # attn_fun
                scores = _attn_add_fun(score_v, keys, query)
            elif attention_option == "luong":
                # reshape query: [batch_size, 1, num_units]
                query = array_ops.reshape(query, [-1, 1, num_units])

                # attn_fun
                scores = _attn_mul_fun(keys, query)
            else:
                raise ValueError("Unknown attention option %s!" %
                                 attention_option)

            # Compute alignment weights
            #   scores: [batch_size, length]
            #   alignments: [batch_size, length]
            # TODO(thangluong): not normalize over padding positions.
            alignments = nn_ops.softmax(scores)

            # Now calculate the attention-weighted vector.
            alignments = array_ops.expand_dims(alignments, 2)
            context_vector = math_ops.reduce_sum(alignments * values, [1])
            context_vector.set_shape([None, num_units])

            return context_vector

        return attention_score_fn
Example #59
0
 def f():
   x = variable_scope.get_variable(
       'v', initializer=constant_op.constant(1.0))
   return x * constant_op.constant(2.0)
Example #60
0
def encode_spectrum(encoder_inputs, intensity_inputs_forward,
                    intensity_inputs_backward, decoder_inputs_forward,
                    decoder_inputs_backward, keep_conv, keep_dense):
    #~ print("encode_spectrum()")

    with variable_scope.variable_scope("embedding_rnn_seq2seq"):

        # spectra_holder
        layer0 = tf.reshape(encoder_inputs[0], [-1, 1, data_utils.MZ_SIZE, 1])

        # conv1
        conv1_W = variable_scope.get_variable(
            name="conv1_W",
            shape=[1, 4, 1, 4],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        conv1_B = variable_scope.get_variable(
            name="conv1_B",
            shape=[4],
            initializer=tf.constant_initializer(0.1))
        #
        # conv2
        conv2_W = variable_scope.get_variable(
            name="conv2_W",
            shape=[1, 4, 4, 4],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        conv2_B = variable_scope.get_variable(
            name="conv2_B",
            shape=[4],
            initializer=tf.constant_initializer(0.1))
        #
        # pool1 [1,1,4,1]
        #
        #~ # conv3
        #~ conv3_W = variable_scope.get_variable(name="conv3_W", shape=[1,4,4,4],
        #~ initializer=tf.uniform_unit_scaling_initializer(1.43))
        #~ conv3_B = variable_scope.get_variable(name="conv3_B", shape=[4],
        #~ initializer=tf.constant_initializer(0.1))
        #~ #
        #~ # pool2 [1,1,4,1]
        #
        # dense1
        dense1_input_size = 1 * (data_utils.MZ_SIZE // (4)) * 4
        dense1_output_size = 512
        dense1_W = variable_scope.get_variable(
            name="dense1_W",
            shape=[dense1_input_size, dense1_output_size],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        dense1_B = variable_scope.get_variable(
            name="dense1_B",
            shape=[dense1_output_size],
            initializer=tf.constant_initializer(0.1))
        #
        # dense2
        #~ dense2_input_size = dense1_output_size
        #~ dense2_output_size = 512
        #~ dense2_W = variable_scope.get_variable(name="dense2_W", shape=[dense2_input_size, dense2_output_size],
        #~ initializer=tf.uniform_unit_scaling_initializer(1.43))
        #~ dense2_B = variable_scope.get_variable(name="dense2_B", shape=[dense2_output_size],
        #~ initializer=tf.constant_initializer(0.1))

        # layers
        conv1 = tf.nn.relu(
            tf.nn.conv2d(layer0, conv1_W, strides=[1, 1, 1, 1], padding='SAME')
            + conv1_B)
        #
        conv2 = tf.nn.relu(
            tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='SAME')
            + conv2_B)
        conv2 = tf.nn.max_pool(conv2,
                               ksize=[1, 1, 6, 1],
                               strides=[1, 1, 4, 1],
                               padding='SAME')
        conv2 = tf.nn.dropout(conv2, keep_conv)
        #
        #~ conv3 = tf.nn.relu(tf.nn.conv2d(conv2, conv3_W, strides=[1,1,1,1], padding='SAME') + conv3_B)
        #~ conv3 = tf.nn.max_pool(conv3, ksize=[1,1,6,1], strides=[1,1,4,1], padding='SAME')
        #~ conv3 = tf.nn.dropout(conv3, keep_conv)
        #
        dense1 = tf.reshape(conv2, [-1, dense1_input_size])
        dense1 = tf.nn.relu(tf.matmul(dense1, dense1_W) + dense1_B)
        dense1 = tf.nn.dropout(dense1, keep_dense)
        #
        #~ dense2 = tf.nn.relu(tf.matmul(dense1, dense2_W) + dense2_B)
        #~ dense2 = tf.nn.dropout(dense2, keep_dense)

        # SPECTRUM as Input 0
        #
        encoded_spectrum = dense1
        #~ #
        #~ encoded_spectrum = tf.zeros(shape=array_ops.shape(layer_dense1_drop))

        return embed_labels(encoded_spectrum, intensity_inputs_forward,
                            intensity_inputs_backward, decoder_inputs_forward,
                            decoder_inputs_backward, keep_conv, keep_dense)