Beispiel #1
0
def _value_and_gradients(fn, fn_arg_list, result=None, grads=None, name=None):
  """Helper to `maybe_call_fn_and_grads`."""
  with tf.name_scope(name, 'value_and_gradients', [fn_arg_list, result, grads]):
    def _convert_to_tensor(x, name):
      ctt = lambda x_: x_ if x_ is None else tf.convert_to_tensor(x_, name=name)
      return [ctt(x_) for x_ in x] if is_list_like(x) else ctt(x)

    fn_arg_list = (list(fn_arg_list) if is_list_like(fn_arg_list)
                   else [fn_arg_list])
    fn_arg_list = _convert_to_tensor(fn_arg_list, 'fn_arg')

    if result is None:
      result = fn(*fn_arg_list)
      if grads is None and tf.executing_eagerly():
        # Ensure we disable bijector cacheing in eager mode.
        # TODO(b/72831017): Remove this once bijector cacheing is fixed for
        # eager mode.
        fn_arg_list = [0 + x for x in fn_arg_list]

    result = _convert_to_tensor(result, 'fn_result')

    if grads is not None:
      grads = _convert_to_tensor(grads, 'fn_grad')
      return result, grads

    if tf.executing_eagerly():
      if is_list_like(result) and len(result) == len(fn_arg_list):
        # Compute the block diagonal of Jacobian.
        # TODO(b/79158574): Guard this calculation by an arg which explicitly
        # requests block diagonal Jacobian calculation.
        def make_fn_slice(i):
          """Needed to prevent `cell-var-from-loop` pylint warning."""
          return lambda *args: fn(*args)[i]
        grads = [
            tfe.gradients_function(make_fn_slice(i))(*fn_arg_list)[i]
            for i in range(len(result))
        ]
      else:
        grads = tfe.gradients_function(fn)(*fn_arg_list)
    else:
      if is_list_like(result) and len(result) == len(fn_arg_list):
        # Compute the block diagonal of Jacobian.
        # TODO(b/79158574): Guard this calculation by an arg which explicitly
        # requests block diagonal Jacobian calculation.
        grads = [tf.gradients(result[i], fn_arg_list[i])[0]
                 for i in range(len(result))]
      else:
        grads = tf.gradients(result, fn_arg_list)

    return result, grads
  def test_gradients_and_propagation_of_nan_in_x(self):
    # If x contains NaN, this should propagate through to y, and not mess up the
    # gradients associated with finite members of x.
    # In fact, even NaN members of x result in finite (zero) gradients.

    x_min = 0.
    x_max = 1.
    dtype = np.float32
    num_pts = 4

    implied_x_ref = np.linspace(x_min, x_max, num_pts, dtype=dtype)
    y_ref = 2 * implied_x_ref

    x_ = np.array([0., 0.1, np.nan, 0.4, 1.]).astype(dtype)
    y_expected = 2 * x_

    x = tf.constant(x_)

    with self.test_session():
      y = tfp.math.interp_regular_1d_grid(x, x_min, x_max, y_ref)
      y_ = self.evaluate(y)
      self.assertAllClose(y_, y_expected, atol=0, rtol=1e-6)
      if not tf.executing_eagerly():
        dy_dx_ = tf.gradients(y, x)[0].eval()
        self.assertAllClose([2., 2., 0., 2., 2.], dy_dx_)
Beispiel #3
0
  def compute_gradients(self, f, args, grad_ys=None):
    """Computes gradients using tf.GradientTape or tf.gradients.

    Arguments:
      f: Function to be differentiated.
      args: List of `Tensor` arguments to be passed to the function `f`.
        Gradients are computed with respect to these arguments.
      grad_ys: Optional. A `Tensor` with the same shape as the `Tensor` returned
        by `f` that contains the incoming gradients with respect to the result
        of `f`.

    Returns:
      grads: List containing gradients of `f` with respect to `args`. It has the
        same length as `args`.
    """
    if tf.executing_eagerly():
      grad_fn = tf.contrib.eager.gradients_function(f)
      if grad_ys is not None:
        grads = grad_fn(*args, dy=grad_ys)
      else:
        grads = grad_fn(*args)
    else:
      res = f(*args)
      grads = tf.gradients(res, args, grad_ys=grad_ys)
    return self.evaluate(grads)
Beispiel #4
0
  def testNormalizations(self, conv_ctor, norm_ctor, norm_kwargs):
    if tf.executing_eagerly():
      self.skipTest("Cannot test normalization correctness in Eager.")
    module = conv_ctor(
        output_channels=[16, 16],
        kernel_shapes=(3,),
        strides=(1,),
        paddings=("SAME",),
        normalization_ctor=norm_ctor,
        normalization_kwargs=norm_kwargs,
        normalize_final=True,
        activate_final=False)  # No final activation, that would un-normalize.
    inputs = tf.random_uniform([16, 48, 64, 3])
    output = module(inputs)
    with tf.train.SingularMonitoredSession() as session:
      output_np = session.run(output)

    # Convert the output into something where all the dimensions that should be
    # jointly normalized are combined to be on axis=1.
    if "axis" in norm_kwargs and norm_kwargs["axis"] == [1, 2]:
      # Check for instance normalization - combine spatial dimensions.
      output_np = np.reshape(output_np, [16, -1, 3])
    else:
      # Check for layer normalization - combine all non-batch dimensions.
      output_np = np.reshape(output_np, [16, -1])
    mean = np.mean(output_np, axis=1)
    std_dev = np.std(output_np, axis=1)
    # High tolerance - summing across big images, this normalization is fairly
    # approximate.
    self.assertAllClose(mean, np.zeros_like(mean), atol=2e-2)
    self.assertAllClose(std_dev, np.ones_like(std_dev), atol=2e-2)
Beispiel #5
0
  def testDataFormat(self, module, data_format):
    net = module(
        output_channels=self.output_channels,
        kernel_shapes=self.kernel_shapes,
        strides=self.strides,
        paddings=self.paddings,
        data_format=data_format)

    input_height, input_width, input_channels = 100, 100, 3
    batch_size = 10
    final_channel = self.output_channels[-1]
    if data_format == "NHWC":
      input_shape = [batch_size, input_height, input_width, input_channels]
      expected_output_shape = [
          batch_size, input_height, input_width, final_channel
      ]

    else:
      input_shape = [batch_size, input_channels, input_height, input_width]
      expected_output_shape = [
          batch_size, final_channel, input_height, input_width
      ]
    input_to_net = tf.random_normal(dtype=tf.float32, shape=input_shape)

    if tf.executing_eagerly() and data_format == "NCHW":
      expected_exception = (
          tf.errors.UnimplementedError
          if module == snt.nets.ConvNet2D else tf.errors.InvalidArgumentError)
      with self.assertRaisesRegexp(expected_exception, "only supports NHWC"):
        output = net(input_to_net)

    else:
      output = net(input_to_net)
      self.assertEqual(output.get_shape().as_list(), expected_output_shape)
Beispiel #6
0
  def testSampleWithSameSeed(self):
    if tf.executing_eagerly():
      return
    scale = make_pd(1., 2)
    df = 4

    chol_w = tfd.Wishart(
        df, scale_tril=chol(scale), input_output_cholesky=False)

    x = self.evaluate(chol_w.sample(1, seed=42))
    chol_x = [chol(x[0])]

    full_w = tfd.Wishart(df, scale, input_output_cholesky=False)
    self.assertAllClose(x, self.evaluate(full_w.sample(1, seed=42)))

    chol_w_chol = tfd.Wishart(
        df, scale_tril=chol(scale), input_output_cholesky=True)
    self.assertAllClose(chol_x, self.evaluate(chol_w_chol.sample(1, seed=42)))
    eigen_values = tf.matrix_diag_part(chol_w_chol.sample(1000, seed=42))
    np.testing.assert_array_less(0., self.evaluate(eigen_values))

    full_w_chol = tfd.Wishart(df, scale=scale, input_output_cholesky=True)
    self.assertAllClose(chol_x, self.evaluate(full_w_chol.sample(1, seed=42)))
    eigen_values = tf.matrix_diag_part(full_w_chol.sample(1000, seed=42))
    np.testing.assert_array_less(0., self.evaluate(eigen_values))
Beispiel #7
0
  def testEventShape(self):
    # Shape is always known for reshaping in eager mode, so we skip these tests.
    if tf.executing_eagerly():
      return

    event_shape_in, event_shape_out = self.build_shapes([2, 3], [6])
    bijector = tfb.Reshape(
        event_shape_out=event_shape_out,
        event_shape_in=event_shape_in,
        validate_args=True)

    self.assertEqual(
        bijector.forward_event_shape(tf.TensorShape([4, 2, 3])).as_list(),
        [4, None])
    self.assertEqual(
        bijector.forward_event_shape(tf.TensorShape([None, 2, 3])).as_list(),
        [None, None])
    self.assertEqual(
        bijector.inverse_event_shape(tf.TensorShape([4, 6])).as_list(),
        [4, None, None])
    self.assertEqual(
        bijector.inverse_event_shape(tf.TensorShape([None, 6])).as_list(),
        [None, None, None])
    # If the input shape is totally unknown, there's nothing we can do!
    self.assertIsNone(
        bijector.forward_event_shape(tf.TensorShape(None)).ndims)
Beispiel #8
0
  def testEventShape(self):
    shape_in_static = tf.TensorShape([2, 3])
    shape_out_static = tf.TensorShape([6])
    bijector = tfb.Reshape(
        event_shape_out=shape_out_static,
        event_shape_in=shape_in_static,
        validate_args=True)

    # Test that forward_ and inverse_event_shape are correct when
    # event_shape_in/_out are statically known, even when the input shapes
    # are only partially specified.
    self.assertEqual(
        bijector.forward_event_shape(tf.TensorShape([4, 2, 3])).as_list(),
        [4, 6])
    self.assertEqual(
        bijector.inverse_event_shape(tf.TensorShape([4, 6])).as_list(),
        [4, 2, 3])

    # Shape is always known for reshaping in eager mode, so we skip these tests.
    if tf.executing_eagerly():
      return
    self.assertEqual(
        bijector.forward_event_shape(tf.TensorShape([None, 2, 3])).as_list(),
        [None, 6])
    self.assertEqual(
        bijector.inverse_event_shape(tf.TensorShape([None, 6])).as_list(),
        [None, 2, 3])
    # If the input shape is totally unknown, there's nothing we can do!
    self.assertIsNone(
        bijector.forward_event_shape(tf.TensorShape(None)).ndims)
 def test_copy_layers(self):
   """Test copying layers."""
   tg = dc.models.TensorGraph()
   features = Feature(shape=(None, 10))
   dense = Dense(
       10, in_layers=features, biases_initializer=tf.random_normal_initializer)
   constant = Constant(10.0)
   output = dense + constant
   tg.add_output(output)
   tg.set_loss(output)
   tg.fit_generator([])
   replacements = {constant: Constant(20.0)}
   copy = output.copy(replacements, tg)
   assert isinstance(copy, Add)
   assert isinstance(copy.in_layers[0], Dense)
   assert isinstance(copy.in_layers[0].in_layers[0], Feature)
   assert copy.in_layers[1] == replacements[constant]
   variables = tg.get_layer_variables(dense)
   with tg._get_tf("Graph").as_default():
     if tf.executing_eagerly():
       values = [v.numpy() for v in variables]
     else:
       values = tg.session.run(variables)
   for v1, v2 in zip(values, copy.in_layers[0].variable_values):
     assert np.array_equal(v1, v2)
Beispiel #10
0
  def testRegularizersInRegularizationLosses(self, transpose, use_bias):
    if transpose:
      module = functools.partial(snt.nets.ConvNet2DTranspose,
                                 output_shapes=[[100, 100]])
    else:
      module = snt.nets.ConvNet2D
    if use_bias:
      regularizers = {"w": tf.contrib.layers.l1_regularizer(scale=0.5),
                      "b": tf.contrib.layers.l2_regularizer(scale=0.5)}
    else:
      regularizers = {"w": tf.contrib.layers.l1_regularizer(scale=0.5)}

    model = module(output_channels=self.output_channels,
                   kernel_shapes=self.kernel_shapes,
                   strides=self.strides,
                   paddings=self.paddings,
                   use_bias=use_bias,
                   regularizers=regularizers)

    input_to_net = tf.random_normal(dtype=tf.float32, shape=(1, 100, 100, 3))
    model(input_to_net)

    regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    expected_num_regularizers = 3 * (2 if use_bias else 1)
    self.assertLen(regularizers, expected_num_regularizers)
    if not tf.executing_eagerly():
      self.assertRegexpMatches(regularizers[0].name, ".*l1_regularizer.*")
      if use_bias:
        self.assertRegexpMatches(regularizers[1].name, ".*l2_regularizer.*")
def _set_seed(seed):
  """Helper which uses graph seed if using TFE."""
  # TODO(b/68017812): Deprecate once TFE supports seed.
  if tf.executing_eagerly():
    tf.set_random_seed(seed)
    return None
  return seed
Beispiel #12
0
  def testActivateBiasFlags(self, activate_final, use_bias, use_dropout):
    mlp = snt.nets.MLP(name=self.module_name,
                       output_sizes=self.output_sizes,
                       activate_final=activate_final,
                       use_bias=use_bias,
                       use_dropout=use_dropout)

    inputs = tf.random_normal(
        dtype=tf.float32, shape=[self.batch_size, self.input_size])
    net = mlp(inputs)

    if not tf.executing_eagerly():
      if activate_final:
        self.assertEqual(net.op.type, "Relu")
      elif use_bias:
        self.assertEqual(net.op.type, "Add")
      else:
        self.assertEqual(net.op.type, "MatMul")

    variables = mlp.get_variables()

    if use_bias:
      self.assertEqual(len(variables), len(self.output_sizes) * 2)
    else:
      self.assertEqual(len(variables), len(self.output_sizes))
Beispiel #13
0
  def testInitialStateNames(self):
    if tf.executing_eagerly():
      return self.skipTest("Tensor.name is meaningless in eager mode.")

    hidden_size_a = 3
    hidden_size_b = 4
    batch_size = 5
    deep_rnn = snt.DeepRNN(
        [snt.LSTM(hidden_size_a, name="a"), snt.LSTM(hidden_size_b, name="b")])
    deep_rnn_state = deep_rnn.initial_state(batch_size, trainable=True)
    self.assertEqual(
        deep_rnn_state[0][0].name,
        "deep_rnn_initial_state/a_initial_state/state_hidden_tiled:0")
    self.assertEqual(
        deep_rnn_state[0][1].name,
        "deep_rnn_initial_state/a_initial_state/state_cell_tiled:0")
    self.assertEqual(
        deep_rnn_state[1][0].name,
        "deep_rnn_initial_state/b_initial_state/state_hidden_tiled:0")
    self.assertEqual(
        deep_rnn_state[1][1].name,
        "deep_rnn_initial_state/b_initial_state/state_cell_tiled:0")

    other_start_state = deep_rnn.initial_state(
        batch_size, trainable=True, name="blah")
    self.assertEqual(other_start_state[0][0].name,
                     "blah/a_initial_state/state_hidden_tiled:0")
    self.assertEqual(other_start_state[0][1].name,
                     "blah/a_initial_state/state_cell_tiled:0")
    self.assertEqual(other_start_state[1][0].name,
                     "blah/b_initial_state/state_hidden_tiled:0")
    self.assertEqual(other_start_state[1][1].name,
                     "blah/b_initial_state/state_cell_tiled:0")
Beispiel #14
0
  def _check_same_graph(self):
    """Checks that the module is not being connect to multiple Graphs.

    An instance of a Sonnet module 'owns' the variables it contains, and permits
    seamless variable sharing. As such, connecting a single module instance to
    multiple Graphs is not possible - this function will raise an error should
    that occur.

    Raises:
      DifferentGraphError: if the module is connected to a different Graph than
        it was previously used in.
    """
    with ops.init_scope():
      # We need `init_scope` incase we're running inside a defun. In that case
      # what we want is information about where the function will be called not
      # where the function is being built.
      current_graph = tf.get_default_graph()
      will_call_in_eager_context = tf.executing_eagerly()

    if self._graph is None:
      self._graph = current_graph
      self._set_module_info()

    if not will_call_in_eager_context:
      # Same graph checks only make sense when calling from graph mode (in eager
      # mode there is a single process level context where all modules are
      # created).
      if self._graph != current_graph:
        raise DifferentGraphError("Cannot connect module to multiple Graphs.")
Beispiel #15
0
  def testCustomGetter(self):
    custom_getter = snt.custom_getters.Context(snt.custom_getters.stop_gradient)
    module = snt.nets.ConvNet2D(output_channels=self.output_channels,
                                kernel_shapes=self.kernel_shapes,
                                rates=self.rates,
                                strides=self.strides,
                                paddings=self.paddings,
                                custom_getter=custom_getter)

    input_shape = [10, 100, 100, 3]
    input_to_net = tf.random_normal(dtype=tf.float32, shape=input_shape)

    if tf.executing_eagerly():
      with tf.GradientTape() as tape0:
        out0 = module(input_to_net)
      with tf.GradientTape() as tape1:
        with custom_getter:
          out1 = module(input_to_net)
      all_vars = tf.trainable_variables()
      out0_grads = tape0.gradient(out0, all_vars)
      out1_grads = tape1.gradient(out1, all_vars)

    else:
      out0 = module(input_to_net)
      with custom_getter:
        out1 = module(input_to_net)
      all_vars = tf.trainable_variables()
      out0_grads = tf.gradients(out0, all_vars)
      out1_grads = tf.gradients(out1, all_vars)

    for grad in out0_grads:
      self.assertNotEqual(None, grad)
    self.assertEqual([None] * len(out1_grads), out1_grads)
  def testReprWorksCorrectlyMultivariate(self):
    mvn_static = tfd.MultivariateNormalDiag(
        loc=np.zeros([2, 2]), name="MVN")
    self.assertEqual(
        repr(mvn_static),
        "<tfp.distributions.MultivariateNormalDiag"
        " 'MVN/'"
        " batch_shape=(2,)"
        " event_shape=(2,)"
        " dtype=float64>")

    # There's no notion of partially known shapes in eager mode, so exit
    # early.
    if tf.executing_eagerly():
      return

    mvn_dynamic = tfd.MultivariateNormalDiag(
        loc=tf.placeholder_with_default(
            input=np.ones((3, 3), dtype=np.float32), shape=[None, 3]),
        name="MVN2")
    self.assertEqual(
        repr(mvn_dynamic),
        "<tfp.distributions.MultivariateNormalDiag"
        " 'MVN2/'"
        " batch_shape=(?,)"  # Partially known.
        " event_shape=(3,)"
        " dtype=float32>")
Beispiel #17
0
  def _call(self, *args, **kwargs):
    """Entry point when a module is called to connect it to the graph.

    This is the entry point when users connect a Module into the Graph. The
    underlying _build method will have been wrapped in a Template by the
    constructor, and we call this template with the provided inputs here.

    Note we use `_call` instead of `__call__` to allow instance level monkey
    patching (see `defun`).

    Args:
      *args: Arguments for underlying _build method.
      **kwargs: Keyword arguments for underlying _build method.

    Returns:
      The result of the underlying _build method.
    """
    self._check_init_called()
    self._check_same_graph()
    with self._capture_variables():
      outputs, subgraph_name_scope = self._template(*args, **kwargs)
    self._is_connected = True
    if not tf.executing_eagerly():
      # In eager mode the module is called a lot more frequently than in graph
      # mode (for each training step) and so we don't keep track of connected
      # subgraphs (since there will be orders of magnitude more of them).
      self._add_connected_subgraph(self._build, outputs, subgraph_name_scope,
                                   *args, **kwargs)
    return outputs
Beispiel #18
0
  def testRegularizers(self, trainable, state_size):
    batch_size = 6

    # Set the attribute to the class since it we can't set properties of
    # abstract classes
    snt.RNNCore.state_size = state_size
    flat_state_size = nest.flatten(state_size)
    core = snt.RNNCore(name="dummy_core")
    flat_regularizer = ([tf.contrib.layers.l1_regularizer(scale=0.5)] *
                        len(flat_state_size))
    trainable_regularizers = nest.pack_sequence_as(
        structure=state_size, flat_sequence=flat_regularizer)

    core.initial_state(batch_size, dtype=tf.float32, trainable=trainable,
                       trainable_regularizers=trainable_regularizers)

    graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    if not trainable:
      self.assertFalse(graph_regularizers)
    else:
      self.assertEqual(len(graph_regularizers), len(flat_state_size))
      if not tf.executing_eagerly():
        for i in range(len(flat_state_size)):
          self.assertRegexpMatches(
              graph_regularizers[i].name, ".*l1_regularizer.*")
Beispiel #19
0
  def test_container_not_supported_in_eager(self):
    if not tf.executing_eagerly():
      self.skipTest("Skipping test in graph mode.")

    container = ReuseVarsTest.VariableContainer("name")
    with self.assertRaisesRegexp(ValueError,
                                 ".* not supported in eager mode .*"):
      container.method_with_reuse()
def _value_and_gradient(fn, *args):
  """Calls `fn` and computes the gradient of the result wrt `arg`."""
  if tf.executing_eagerly():
    v, g = tfe.value_and_gradients_function(fn)(args)
  else:
    v = fn(*args)
    g = tf.gradients(v, args)
  return v, g
Beispiel #21
0
  def grad_potential(self, position, check_numerics=True):
    """Get gradient of potential function at current location."""

    if tf.executing_eagerly():
      grad = tfe.gradients_function(self.potential)(position)[0]
    else:
      grad = tf.gradients(self.potential(position), position)[0]

    return grad
Beispiel #22
0
def _convert_to_tensor(value, name=None, preferred_dtype=None):
  """Converts to tensor avoiding an eager bug that loses float precision."""
  # TODO(b/116672045): Remove this function.
  if (tf.executing_eagerly() and preferred_dtype is not None and
      (preferred_dtype.is_integer or preferred_dtype.is_bool)):
    v = tf.convert_to_tensor(value, name=name)
    if v.dtype.is_floating:
      return v
  return tf.convert_to_tensor(
      value, name=name, preferred_dtype=preferred_dtype)
Beispiel #23
0
  def testGetVariable(self, use_resource):
    if tf.executing_eagerly() and not use_resource:
      self.skipTest("Ref variables not supported in eager mode.")

    variables = []
    with util.notify_about_variables(variables.append):
      with tf.variable_scope("", use_resource=use_resource):
        x = tf.get_variable("x", [])
    self.assertVariableType(x, use_resource)
    self.assertEqual(variables, [x])
Beispiel #24
0
 def test_reuse_vars_subgraph_recording(self):
   obj1 = ReuseVarsTest.ModuleReuse(shape=[3, 4], name="scope1")
   self.assertFalse(obj1.is_connected)
   obj1_a_outputs = obj1.a()
   self.assertTrue(obj1.is_connected)
   if not tf.executing_eagerly():
     self.assertEqual(obj1.last_connected_subgraph.name_scope, "scope1/a/")
     self.assertIs(obj1.last_connected_subgraph.module, obj1)
     self.assertEqual(obj1.last_connected_subgraph.inputs, {})
     self.assertIs(obj1.last_connected_subgraph.outputs, obj1_a_outputs)
Beispiel #25
0
def run_with_static_graph():
    with tf.Graph().as_default():
        print(tf.executing_eagerly()) # False

        model = Model(num_actions=env.action_space.n)
        agent = A2CAgent(model)

        rewards_history = agent.train(env)
        print("Finished training, testing...")
        print("%d out of 200" % agent.test(env)) # 200 out of 200
  def testCopy(self):
    # 5 random index points in R^2
    index_points_1 = np.random.uniform(-4., 4., (5, 2)).astype(np.float32)
    # 10 random index points in R^2
    index_points_2 = np.random.uniform(-4., 4., (10, 2)).astype(np.float32)

    # ==> shape = [6, 25, 2]
    if not self.is_static:
      index_points_1 = tf.placeholder_with_default(index_points_1, shape=None)
      index_points_2 = tf.placeholder_with_default(index_points_2, shape=None)

    mean_fn = lambda x: np.array([0.], np.float32)
    kernel_1 = psd_kernels.ExponentiatedQuadratic()
    kernel_2 = psd_kernels.ExpSinSquared()

    tp1 = tfd.StudentTProcess(
        df=3.,
        kernel=kernel_1,
        index_points=index_points_1,
        mean_fn=mean_fn,
        jitter=1e-5)
    tp2 = tp1.copy(df=4., index_points=index_points_2, kernel=kernel_2)

    event_shape_1 = [5]
    event_shape_2 = [10]

    self.assertEqual(tp1.mean_fn, tp2.mean_fn)
    self.assertIsInstance(tp1.kernel, psd_kernels.ExponentiatedQuadratic)
    self.assertIsInstance(tp2.kernel, psd_kernels.ExpSinSquared)

    if self.is_static or tf.executing_eagerly():
      self.assertAllEqual(tp1.batch_shape, tp2.batch_shape)
      self.assertAllEqual(tp1.event_shape, event_shape_1)
      self.assertAllEqual(tp2.event_shape, event_shape_2)
      self.assertEqual(self.evaluate(tp1.df), 3.)
      self.assertEqual(self.evaluate(tp2.df), 4.)
      self.assertAllEqual(tp2.index_points, index_points_2)
      self.assertAllEqual(tp1.index_points, index_points_1)
      self.assertAllEqual(tp2.index_points, index_points_2)
      self.assertAllEqual(
          tf.contrib.util.constant_value(tp1.jitter),
          tf.contrib.util.constant_value(tp2.jitter))
    else:
      self.assertAllEqual(
          self.evaluate(tp1.batch_shape_tensor()),
          self.evaluate(tp2.batch_shape_tensor()))
      self.assertAllEqual(
          self.evaluate(tp1.event_shape_tensor()), event_shape_1)
      self.assertAllEqual(
          self.evaluate(tp2.event_shape_tensor()), event_shape_2)
      self.assertEqual(self.evaluate(tp1.jitter), self.evaluate(tp2.jitter))
      self.assertEqual(self.evaluate(tp1.df), 3.)
      self.assertEqual(self.evaluate(tp2.df), 4.)
      self.assertAllEqual(self.evaluate(tp1.index_points), index_points_1)
      self.assertAllEqual(self.evaluate(tp2.index_points), index_points_2)
Beispiel #27
0
 def testDynamicEventShape(self):
   if tf.executing_eagerly():
     return
   loc = np.float32(self._rng.rand(2, 3, 2))
   scale_diag = np.float32(self._rng.rand(2, 3, 2))
   mvn = tfd.MultivariateNormalDiag(
       loc=tf.placeholder_with_default(input=loc, shape=[2, 3, None]),
       scale_diag=tf.placeholder_with_default(
           input=scale_diag, shape=[2, 3, None]))
   self.assertListEqual(mvn.batch_shape.as_list(), [2, 3])
   self.assertListEqual(mvn.event_shape.as_list(), [None])
  def testHalfNormalShapeWithPlaceholders(self):
    if tf.executing_eagerly():
      return
    scale = tf.placeholder_with_default(input=[1., 2], shape=None)
    halfnorm = tfd.HalfNormal(scale=scale)

    # get_batch_shape should return an "<unknown>" tensor.
    self.assertEqual(halfnorm.batch_shape, tf.TensorShape(None))
    self.assertEqual(halfnorm.event_shape, ())
    self.assertAllEqual(self.evaluate(halfnorm.event_shape_tensor()), [])
    self.assertAllEqual(self.evaluate(halfnorm.batch_shape_tensor()), [2])
 def testInvalidPermException(self):
   msg = '`perm` must be a valid permutation vector.'
   if self.is_static or tf.executing_eagerly():
     with self.assertRaisesRegexp(ValueError, msg):
       bijector = tfb.Transpose(perm=[1, 2], validate_args=True)
   else:
     with self.assertRaisesOpError(msg):
       bijector = tfb.Transpose(
           perm=tf.placeholder_with_default([1, 2], shape=[2]),
           validate_args=True)
       self.evaluate(bijector.forward([[0, 1]]))
Beispiel #30
0
 def testDropout(self):
   if tf.executing_eagerly():
     self.skipTest("Test not supported when executing eagerly")
   mlp_name = "test_dropout_on_mlp"
   mlp = snt.nets.MLP([1], use_dropout=True, use_bias=False,
                      activate_final=True, name=mlp_name)
   _ = mlp(tf.ones([1, 1]), is_training=True,
           dropout_keep_prob=0.5)
   op_names = [op.name for op in tf.get_default_graph().get_operations()]
   op_to_look_for = "{}_1/dropout/Shape".format(mlp_name)
   self.assertIn(op_to_look_for, op_names)
Beispiel #31
0
    def _intel_cpu_quantize_weight_eightbit(self,
                                            parent,
                                            input_node,
                                            per_channel,
                                            quantization_mode=b"SCALED"):
        base_name = input_node.name + "_"
        qint8_const_name = base_name + "qint8_const"
        min_name = base_name + "min"
        max_name = base_name + "max"
        float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
        epsilon = 1e-4  # Needs to be set empirically if accuracy is not satisfactory
        if parent in ("Conv2D", "MatMul"):
            if per_channel:
                ranges = np.abs(float_tensor).max(axis=(0, 1, 2))
                min_value = -ranges
                max_value = ranges
                # nudging min-max values outside epsilon radius around zero
                ranges[ranges < epsilon] = epsilon
                min_value[np.abs(min_value) < epsilon] = -epsilon
                max_value[np.abs(max_value) < epsilon] = epsilon
                qint8_tensor = (float_tensor * 127.0 / ranges).astype(np.int8)
            else:
                min_value = np.min(float_tensor.flatten())
                max_value = np.max(float_tensor.flatten())
                # Same processing of min-max as in quantize_weight_eightbit
                # function.
                if min_value > 0.0:
                    min_value = 0.0
                if min_value == max_value:
                    if abs(min_value) < 0.000001:
                        max_value = min_value + 1.0
                    elif min_value > 0:
                        max_value = 2 * min_value
                    else:
                        max_value = min_value / 2.0

                sess = tf.compat.v1.Session()
                with sess.as_default():
                    quantize_op = array_ops.quantize_v2(
                        float_tensor,
                        min_value,
                        max_value,
                        dtypes.qint8,
                        mode=quantization_mode,
                        round_mode="HALF_TO_EVEN")
                    qint8_tensor = quantize_op[0].numpy(
                    ) if tf.executing_eagerly() else quantize_op[0].eval()
                    # Updated min-max values should be passed to the next
                    # feeding node.
                    min_value = quantize_op[1].numpy() if tf.executing_eagerly(
                    ) else quantize_op[1].eval()
                    max_value = quantize_op[2].numpy() if tf.executing_eagerly(
                    ) else quantize_op[2].eval()
                sess.close()
        elif parent == "DepthwiseConv2dNative":
            # get the max values based on dim 0 and 1 for depthwise conv
            # since, the output channel will be dim 2 * dim 3
            ranges = np.abs(float_tensor).max(axis=(0, 1))
            ranges = ranges.flatten()
            min_value = -ranges
            max_value = ranges
            # nudging min-max values outside epsilon radius around zero
            ranges[ranges < epsilon] = epsilon
            min_value[np.abs(min_value) < epsilon] = -epsilon
            max_value[np.abs(max_value) < epsilon] = epsilon
            # Since output channel will be 1 dim which is dim 2 * dim 3
            # When divide by range, qint8_tensor needs to be 3 dim
            # where, 3rd dim should be same dim of ranges
            a, b, c, d = float_tensor.shape
            qint8_tensor = (float_tensor.reshape(a, b, c * d) * 127.0 /
                            ranges).astype(np.int8)
            # get the shape back to 4 dim
            qint8_tensor = qint8_tensor.reshape(a, b, c, d)
        shape = tensor_util.TensorShapeProtoToList(
            input_node.attr["value"].tensor.tensor_shape)
        qint8_const_node = helper.create_constant_node(qint8_const_name,
                                                       qint8_tensor,
                                                       dtypes.qint8,
                                                       shape=shape)

        min_node = helper.create_constant_node(min_name,
                                               min_value,
                                               dtypes.float32,
                                               device=self.device)

        max_node = helper.create_constant_node(max_name,
                                               max_value,
                                               dtypes.float32,
                                               device=self.device)

        dequantize_node = helper.create_node(
            "Dequantize", input_node.name,
            [qint8_const_name, min_name, max_name])

        helper.set_attr_dtype(dequantize_node, "T", dtypes.qint8)
        helper.set_attr_string(dequantize_node, "mode", b"SCALED")
        self.add_output_graph_node(qint8_const_node)
        self.add_output_graph_node(min_node)
        self.add_output_graph_node(max_node)
        self.add_output_graph_node(dequantize_node)
Beispiel #32
0
def has_eager_been_enabled():
    """Returns true iff in TF2 or in TF1 with eager execution enabled."""
    with tf.init_scope():
        return tf.executing_eagerly()
Beispiel #33
0
    def create_eval_metrics(self, features, labels, estimator_spec, metric_fn):
        """Creates evaluation metrics from the given arguments.

    Args:
      features: Input `dict` of `Tensor` objects.
      labels: Labels `Tensor` or a dictionary of string label name to `Tensor`
        (for multi-head).
      estimator_spec: The `EstimatorSpec` created by a `Head` instance.
      metric_fn: A function which should obey the following signature:
      - Args: can only have following three arguments in any order:
        * predictions: Predictions `Tensor` or dict of `Tensor` created by given
          `Head`.
        * features: Input `dict` of `Tensor` objects created by `input_fn` which
          is given to `estimator.evaluate` as an argument.
        * labels:  Labels `Tensor` or dict of `Tensor` (for multi-head) created
          by `input_fn` which is given to `estimator.evaluate` as an argument.
      - Returns: Dict of metric results keyed by name. Final metrics are a union
        of this and `estimator`s existing metrics. If there is a name conflict
        between this and `estimator`s existing metrics, this will override the
        existing one. The values of the dict are the results of calling a metric
        function, namely a `(metric_tensor, update_op)` tuple.
    """

        # TODO: Create CPU eval metrics non-lazily, similar to summaries.py.

        # If estimator_spec is not a TPUEstimatorSpec we create dummy metric_fn
        # and args.
        if isinstance(estimator_spec, tf.estimator.EstimatorSpec):
            spec_fn, spec_args = lambda: estimator_spec.eval_metric_ops, []
        else:
            spec_fn, spec_args = estimator_spec.eval_metrics
        self._eval_metrics_store.add_eval_metrics(
            self._templatize_metric_fn(spec_fn), spec_args)

        if tf_compat.version_greater_or_equal(
                "1.13.0") and tf.executing_eagerly():
            loss_metrics = tf.keras.metrics.Mean("mean_loss")
            loss_metrics(estimator_spec.loss)
        else:
            loss_metrics = tf_compat.v1.metrics.mean(estimator_spec.loss)

        def loss_fn(loss):
            if self._use_tpu:
                return {"loss": tf_compat.v1.metrics.mean(loss)}
            return {"loss": loss_metrics}

        loss_fn_args = [tf.reshape(estimator_spec.loss, [1])]
        self._eval_metrics_store.add_eval_metrics(
            self._templatize_metric_fn(loss_fn), loss_fn_args)

        # NOTE: the user supplied metrics_fn must be added last. This is because we
        # want user metrics to override AdaNet's metrics.
        if metric_fn:
            metric_fn_args = {}
            argspec = inspect.getargspec(metric_fn).args
            if "features" in argspec:
                metric_fn_args["features"] = features
            if "labels" in argspec:
                metric_fn_args["labels"] = labels
            if "predictions" in argspec:
                metric_fn_args["predictions"] = estimator_spec.predictions
            additional_metrics = call_eval_metrics((metric_fn, metric_fn_args))

            def additional_metrics_fn(**kwargs):
                if self._use_tpu:
                    return call_eval_metrics((metric_fn, kwargs))
                return additional_metrics

            self._eval_metrics_store.add_eval_metrics(
                self._templatize_metric_fn(additional_metrics_fn),
                metric_fn_args)
Beispiel #34
0
  def __init__(self,
               learning_rate,
               preconditioner_decay_rate=0.95,
               data_size=1,
               burnin=25,
               diagonal_bias=1e-8,
               name=None,
               parallel_iterations=10,
               variable_scope=None):
    default_name = 'StochasticGradientLangevinDynamics'
    with tf.name_scope(name, default_name, [
        learning_rate, preconditioner_decay_rate, data_size, burnin,
        diagonal_bias
    ]):
      if tf.executing_eagerly():
        raise NotImplementedError('Eager execution currently not supported for '
                                  ' SGLD optimizer.')
      if variable_scope is None:
        var_scope_name = tf.get_default_graph().unique_name(
            name or default_name)
        with tf.variable_scope(var_scope_name) as scope:
          self._variable_scope = scope
      else:
        self._variable_scope = variable_scope

      self._preconditioner_decay_rate = tf.convert_to_tensor(
          preconditioner_decay_rate, name='preconditioner_decay_rate')
      self._data_size = tf.convert_to_tensor(
          data_size, name='data_size')
      self._burnin = tf.convert_to_tensor(burnin, name='burnin')
      self._diagonal_bias = tf.convert_to_tensor(
          diagonal_bias, name='diagonal_bias')
      self._learning_rate = tf.convert_to_tensor(
          learning_rate, name='learning_rate')
      self._parallel_iterations = parallel_iterations

      with tf.variable_scope(self._variable_scope):
        self._counter = tf.get_variable(
            'counter', initializer=0, trainable=False)

      self._preconditioner_decay_rate = control_flow_ops.with_dependencies([
          tf.assert_non_negative(
              self._preconditioner_decay_rate,
              message='`preconditioner_decay_rate` must be non-negative'),
          tf.assert_less_equal(
              self._preconditioner_decay_rate,
              1.,
              message='`preconditioner_decay_rate` must be at most 1.'),
      ], self._preconditioner_decay_rate)

      self._data_size = control_flow_ops.with_dependencies([
          tf.assert_greater(
              self._data_size,
              0,
              message='`data_size` must be greater than zero')
      ], self._data_size)

      self._burnin = control_flow_ops.with_dependencies([
          tf.assert_non_negative(
              self._burnin, message='`burnin` must be non-negative'),
          tf.assert_integer(
              self._burnin, message='`burnin` must be an integer')
      ], self._burnin)

      self._diagonal_bias = control_flow_ops.with_dependencies([
          tf.assert_non_negative(
              self._diagonal_bias,
              message='`diagonal_bias` must be non-negative')
      ], self._diagonal_bias)

      super(StochasticGradientLangevinDynamics, self).__init__(
          use_locking=False, name=name or default_name)
  def _testMVN(self,
               base_distribution_class,
               base_distribution_kwargs,
               batch_shape=(),
               event_shape=(),
               not_implemented_message=None):
    # Overriding shapes must be compatible w/bijector; most bijectors are
    # batch_shape agnostic and only care about event_ndims.
    # In the case of `Affine`, if we got it wrong then it would fire an
    # exception due to incompatible dimensions.
    batch_shape_pl = tf.compat.v1.placeholder_with_default(
        input=np.int32(batch_shape), shape=None, name="dynamic_batch_shape")
    event_shape_pl = tf.compat.v1.placeholder_with_default(
        input=np.int32(event_shape), shape=None, name="dynamic_event_shape")
    fake_mvn_dynamic = self._cls()(
        distribution=base_distribution_class(
            validate_args=True, **base_distribution_kwargs),
        bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril),
        batch_shape=batch_shape_pl,
        event_shape=event_shape_pl,
        validate_args=True)

    fake_mvn_static = self._cls()(
        distribution=base_distribution_class(
            validate_args=True, **base_distribution_kwargs),
        bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril),
        batch_shape=batch_shape,
        event_shape=event_shape,
        validate_args=True)

    actual_mean = np.tile(self._shift, [2, 1])  # Affine elided this tile.
    actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1]))

    def actual_mvn_log_prob(x):
      return np.concatenate([[
          stats.multivariate_normal(actual_mean[i],
                                    actual_cov[i]).logpdf(x[:, i, :])
      ] for i in range(len(actual_cov))]).T

    actual_mvn_entropy = np.concatenate(
        [[stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy()]
         for i in range(len(actual_cov))])

    self.assertAllEqual([3], fake_mvn_static.event_shape)
    self.assertAllEqual([2], fake_mvn_static.batch_shape)

    if not tf.executing_eagerly():
      self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape)
      self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape)

    x = self.evaluate(fake_mvn_static.sample(5, seed=0))
    for unsupported_fn in (fake_mvn_static.log_cdf, fake_mvn_static.cdf,
                           fake_mvn_static.survival_function,
                           fake_mvn_static.log_survival_function):
      with self.assertRaisesRegexp(NotImplementedError,
                                   not_implemented_message):
        unsupported_fn(x)

    num_samples = 7e3
    for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]:
      # Ensure sample works by checking first, second moments.
      y = fake_mvn.sample(int(num_samples), seed=0)
      x = y[0:5, ...]
      sample_mean = tf.reduce_mean(input_tensor=y, axis=0)
      centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0])
      sample_cov = tf.matmul(
          centered_y, centered_y, transpose_b=True) / num_samples
      [
          sample_mean_,
          sample_cov_,
          x_,
          fake_event_shape_,
          fake_batch_shape_,
          fake_log_prob_,
          fake_prob_,
          fake_mean_,
          fake_entropy_,
      ] = self.evaluate([
          sample_mean,
          sample_cov,
          x,
          fake_mvn.event_shape_tensor(),
          fake_mvn.batch_shape_tensor(),
          fake_mvn.log_prob(x),
          fake_mvn.prob(x),
          fake_mvn.mean(),
          fake_mvn.entropy(),
      ])

      self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1)
      self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1)

      # Ensure all other functions work as intended.
      self.assertAllEqual([5, 2, 3], x_.shape)
      self.assertAllEqual([3], fake_event_shape_)
      self.assertAllEqual([2], fake_batch_shape_)
      self.assertAllClose(
          actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6)
      self.assertAllClose(
          np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5)
      self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6)
      self.assertAllClose(actual_mvn_entropy, fake_entropy_, atol=0., rtol=1e-6)
Beispiel #36
0
 def static_value(self, t):
     """Gets the eager/immediate value of `t`, or `None` if `t` is a Tensor."""
     if tf.executing_eagerly():
         return t.numpy()
     return None
 def setUp(self):
     tf.compat.v1.reset_default_graph()
     keras.backend.clear_session()
     tf.compat.v1.disable_eager_execution()
     print("Eager Execution:", tf.executing_eagerly())
Beispiel #38
0
    def _initialize_params(
        self,
        model: KERAS_MODEL_TYPE,
        use_logits: bool,
        input_layer: int,
        output_layer: int,
    ):
        """
        Initialize most parameters of the classifier. This is a convenience function called by `__init__` and
        `__setstate__` to avoid code duplication.

        :param model: Keras model
        :param use_logits: True if the output of the model are logits.
        :param input_layer: Which layer to consider as the Input when the model has multiple input layers.
        :param output_layer: Which layer to consider as the Output when the model has multiple output layers.
        """
        # pylint: disable=E0401
        if self.is_tensorflow:
            import tensorflow as tf  # lgtm [py/repeated-import]

            if tf.executing_eagerly():
                raise ValueError(
                    "TensorFlow is executing eagerly. Please disable eager execution."
                )
            import tensorflow.keras as keras
            import tensorflow.keras.backend as k
        else:
            import keras  # lgtm [py/repeated-import]
            import keras.backend as k

        if hasattr(model, "inputs"):
            self._input_layer = input_layer
            self._input = model.inputs[input_layer]
        else:
            self._input = model.input
            self._input_layer = 0

        if hasattr(model, "outputs"):
            self._output = model.outputs[output_layer]
            self._output_layer = output_layer
        else:
            self._output = model.output
            self._output_layer = 0

        _, self._nb_classes = k.int_shape(self._output)
        self._input_shape = k.int_shape(self._input)[1:]
        logger.debug(
            "Inferred %i classes and %s as input shape for Keras classifier.",
            self.nb_classes,
            str(self.input_shape),
        )

        self._use_logits = use_logits

        # Get loss function
        if not hasattr(self._model, "loss"):
            logger.warning(
                "Keras model has no loss set. Classifier tries to use `k.sparse_categorical_crossentropy`."
            )
            loss_function = k.sparse_categorical_crossentropy
        else:

            if isinstance(self._model.loss, six.string_types):
                loss_function = getattr(k, self._model.loss)

            elif "__name__" in dir(
                    self._model.loss) and self._model.loss.__name__ in [
                        "categorical_hinge",
                        "categorical_crossentropy",
                        "sparse_categorical_crossentropy",
                        "binary_crossentropy",
                        "kullback_leibler_divergence",
                    ]:
                if self._model.loss.__name__ in [
                        "categorical_hinge",
                        "kullback_leibler_divergence",
                ]:
                    loss_function = getattr(keras.losses,
                                            self._model.loss.__name__)
                else:
                    loss_function = getattr(keras.backend,
                                            self._model.loss.__name__)

            elif isinstance(
                    self._model.loss,
                (
                    keras.losses.CategoricalHinge,
                    keras.losses.CategoricalCrossentropy,
                    keras.losses.SparseCategoricalCrossentropy,
                    keras.losses.BinaryCrossentropy,
                    keras.losses.KLDivergence,
                ),
            ):
                loss_function = self._model.loss
            else:
                loss_function = getattr(k, self._model.loss.__name__)

        # Check if loss function is an instance of loss function generator, the try is required because some of the
        # modules are not available in older Keras versions
        try:
            flag_is_instance = isinstance(
                loss_function,
                (
                    keras.losses.CategoricalHinge,
                    keras.losses.CategoricalCrossentropy,
                    keras.losses.BinaryCrossentropy,
                    keras.losses.KLDivergence,
                ),
            )
        except AttributeError:
            flag_is_instance = False

        # Check if the labels have to be reduced to index labels and create placeholder for labels
        if ("__name__" in dir(loss_function) and loss_function.__name__ in [
                "categorical_hinge",
                "categorical_crossentropy",
                "binary_crossentropy",
                "kullback_leibler_divergence",
        ]) or (self.is_tensorflow and flag_is_instance):
            self._reduce_labels = False
            label_ph = k.placeholder(shape=self._output.shape)
        elif ("__name__" in dir(loss_function) and loss_function.__name__
              in ["sparse_categorical_crossentropy"]) or isinstance(
                  loss_function, keras.losses.SparseCategoricalCrossentropy):
            self._reduce_labels = True
            label_ph = k.placeholder(shape=[
                None,
            ])
        else:
            raise ValueError("Loss function not recognised.")

        # Define the loss using the loss function
        if "__name__" in dir(loss_function, ) and loss_function.__name__ in [
                "categorical_crossentropy",
                "sparse_categorical_crossentropy",
                "binary_crossentropy",
        ]:
            loss_ = loss_function(label_ph,
                                  self._output,
                                  from_logits=self._use_logits)

        elif "__name__" in dir(loss_function) and loss_function.__name__ in [
                "categorical_hinge",
                "kullback_leibler_divergence",
        ]:
            loss_ = loss_function(label_ph, self._output)

        elif isinstance(
                loss_function,
            (
                keras.losses.CategoricalHinge,
                keras.losses.CategoricalCrossentropy,
                keras.losses.SparseCategoricalCrossentropy,
                keras.losses.KLDivergence,
                keras.losses.BinaryCrossentropy,
            ),
        ):
            loss_ = loss_function(label_ph, self._output)

        # Define loss gradients
        loss_gradients = k.gradients(loss_, self._input)

        if k.backend() == "tensorflow":
            loss_gradients = loss_gradients[0]
        elif k.backend() == "cntk":
            raise NotImplementedError(
                "Only TensorFlow is supported as backend for Keras.")

        # Set loss, gradients and prediction functions
        self._predictions_op = self._output
        self._loss = loss_
        self._loss_gradients = k.function([self._input, label_ph],
                                          [loss_gradients])

        # Get the internal layer
        self._layer_names = self._get_layers()
Beispiel #39
0
import os
import warnings
import tensorflow as tf
import time
import logging

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)

logging.getLogger('tensorflow').setLevel(logging.FATAL)

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

stats_file = "tf_stats_data_parallel_mnist.csv"

tf.executing_eagerly()

(mnist_train_images, mnist_train_labels), (
    (mnist_test_images,
     mnist_test_labels)) = tf.keras.datasets.mnist.load_data()

sequential_mini_batch_size = 600
# picking constant mini-batch size
mini_batch_size = 25

print(mnist_train_images.shape, mnist_train_labels.shape,
      mnist_test_images.shape, mnist_test_labels.shape)

mnist_train_images_local = mnist_train_images
mnist_train_labels_local = mnist_train_labels
Beispiel #40
0
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np

print(50 * '*' + ' Parte 2 ' + 50 * '*')
#tensorflow version being used
print('TF version:', tf.__version__)

#is tf executing eagerly?
print(tf.executing_eagerly())

#load mnist training and test data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

#we have 10 labels (0:Zero, 1:One, 2:Two, 3:Three, 4:Four, 5:Five, 6:Six, 7:Seven, 8:Eight, 9:Nine)
#each image is mapped to one single label. class names are not included in the dataset
labels = [
    'Zero', 'One', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven', 'Eight',
    'Nine'
]

#data shape and cardinality
print('Train set shape', x_train.shape)
print('Train labels shape', y_train.shape)
print('Test set shape', x_test.shape)
print('Test labels shape', y_test.shape)
print('Number of training samples', len(x_train))
print('Number of testing samples', len(x_test))

#show a figure
Beispiel #41
0
    def __init__(self, filename, columns=None, internal=True):
        """ParquetIODataset."""
        assert internal
        with tf.name_scope("ParquetIODataset"):
            components, shapes, dtypes = core_ops.io_parquet_readable_info(
                filename, shared=filename, container="ParquetIODataset"
            )

            def component_f(components, column):
                component = tf.boolean_mask(
                    components, tf.math.equal(components, column)
                )[0]
                return component

            def shape_f(shapes, components, column):
                shape = tf.boolean_mask(shapes, tf.math.equal(components, column))[0]
                shape = tf.boolean_mask(shape, tf.math.greater_equal(shape, 0))
                return shape

            def dtype_f(dtypes, components, column):
                dtype = tf.boolean_mask(dtypes, tf.math.equal(components, column))[0]
                dtype = tf.as_dtype(dtype.numpy())
                return dtype

            if not tf.executing_eagerly():
                if columns is None or not isinstance(columns, dict):
                    raise ValueError(
                        "The `columns` parameter can only be "
                        "a dictionary in graph execution, mapping "
                        "feature names to `tf.TensorSpec`."
                    )
                shapes = [shape_f(shapes, components, column) for column in columns]
                dtypes = [
                    spec if isinstance(spec, tf.dtypes.DType) else spec.dtype
                    for column, spec in columns.items()
                ]
                components = [component_f(components, column) for column in columns]
                column_names = list(columns.keys())
            elif columns is not None:
                shapes = [shape_f(shapes, components, column) for column in columns]
                dtypes = [dtype_f(dtypes, components, column) for column in columns]
                components = (
                    list(columns.keys()) if isinstance(columns, dict) else columns
                )
                column_names = components
            else:
                shapes = tf.unstack(shapes)
                dtypes = [tf.as_dtype(dtype.numpy()) for dtype in tf.unstack(dtypes)]
                components = [component.numpy() for component in tf.unstack(components)]
                column_names = components

            self._filename = filename
            self._components = components
            self._shapes = shapes
            self._dtypes = dtypes

            def dataset_f(component, shape, dtype):
                step = 4096
                indices_start = tf.data.Dataset.range(0, shape[0], step)
                indices_stop = indices_start.skip(1).concatenate(
                    tf.data.Dataset.from_tensor_slices(
                        tf.convert_to_tensor([shape[0]], tf.int64)
                    )
                )
                dataset = tf.data.Dataset.zip((indices_start, indices_stop))

                def f(start, stop):
                    return core_ops.io_parquet_readable_read(
                        input=self._filename,
                        shared=self._filename,
                        component=component,
                        shape=shape,
                        start=start,
                        stop=stop,
                        dtype=dtype,
                        container="ParquetIODataset",
                    )

                dataset = dataset.map(f)
                dataset = dataset.unbatch()
                return dataset

            entries = list(zip(components, shapes, dtypes))
            datasets = [
                dataset_f(component, shape, dtype)
                for component, shape, dtype in entries
            ]
            self._dataset = tf.data.Dataset.zip(
                collections.OrderedDict(list(zip(column_names, datasets)))
            )

            # Override the default `element_spec` with given specs if available.
            if isinstance(columns, dict) and all(
                isinstance(val, tf.TensorSpec) for val in columns.values()
            ):
                self._element_spec = collections.OrderedDict(columns)
            else:
                self._element_spec = None

            super().__init__(
                self._dataset._variant_tensor
            )  # pylint: disable=protected-access
def run_customized_training_loop(
        # pylint: disable=invalid-name
        _sentinel=None,
        # pylint: enable=invalid-name
        strategy=None,
        model_fn=None,
        loss_fn=None,
        scale_loss=True,
        model_dir=None,
        train_input_fn=None,
        steps_per_epoch=None,
        steps_per_loop=None,
        epochs=1,
        eval_input_fn=None,
        eval_steps=None,
        metric_fn=None,
        init_checkpoint=None,
        custom_callbacks=None,
        run_eagerly=False,
        sub_model_export_name=None,
        explicit_allreduce=False,
        pre_allreduce_callbacks=None,
        post_allreduce_callbacks=None,
        train_summary_interval=0):
    """Run BERT pretrain model training using low-level API.

  Arguments:
      _sentinel: Used to prevent positional parameters. Internal, do not use.
      strategy: Distribution strategy on which to run low level training loop.
      model_fn: Function that returns a tuple (model, sub_model). Caller of this
        function should add optimizer to the `model` via calling
        `model.compile()` API or manually setting `model.optimizer` attribute.
        Second element of the returned tuple(sub_model) is an optional sub model
        to be used for initial checkpoint -- if provided.
      loss_fn: Function with signature func(labels, logits) and returns a loss
        tensor.
      scale_loss: Whether to divide the raw loss by number of replicas before
        gradients calculation.
      model_dir: Model directory used during training for restoring/saving model
        weights.
      train_input_fn: Function that returns a tf.data.Dataset used for training.
      steps_per_epoch: Number of steps to run per epoch. At the end of each
        epoch, model checkpoint will be saved and evaluation will be conducted
        if evaluation dataset is provided.
      steps_per_loop: Number of steps per graph-mode loop. In order to reduce
        communication in eager context, training logs are printed every
        steps_per_loop.
      epochs: Number of epochs to train.
      eval_input_fn: Function that returns evaluation dataset. If none,
        evaluation is skipped.
      eval_steps: Number of steps to run evaluation. Required if `eval_input_fn`
        is not none.
      metric_fn: A metrics function that returns a Keras Metric object to record
        evaluation result using evaluation dataset or with training dataset
        after every epoch.
      init_checkpoint: Optional checkpoint to load to `sub_model` returned by
        `model_fn`.
      custom_callbacks: A list of Keras Callbacks objects to run during
        training. More specifically, `on_batch_begin()`, `on_batch_end()`,
        `on_epoch_begin()`, `on_epoch_end()` methods are invoked during
        training.  Note that some metrics may be missing from `logs`.
      run_eagerly: Whether to run model training in pure eager execution. This
        should be disable for TPUStrategy.
      sub_model_export_name: If not None, will export `sub_model` returned by
        `model_fn` into checkpoint files. The name of intermediate checkpoint
        file is {sub_model_export_name}_step_{step}.ckpt and the last
        checkpint's name is {sub_model_export_name}.ckpt;
        if None, `sub_model` will not be exported as checkpoint.
      explicit_allreduce: Whether to explicitly perform gradient allreduce,
        instead of relying on implicit allreduce in optimizer.apply_gradients().
        default is False. For now, if training using FP16 mixed precision,
        explicit allreduce will aggregate gradients in FP16 format. For TPU and
        GPU training using FP32, explicit allreduce will aggregate gradients in
        FP32 format.
      pre_allreduce_callbacks: A list of callback functions that takes gradients
        and model variables pairs as input, manipulate them, and returns a new
        gradients and model variables paris. The callback functions will be
        invoked in the list order and before gradients are allreduced.
        With mixed precision training, the pre_allreduce_allbacks will be
        applied on scaled_gradients. Default is no callbacks.
        Only used when explicit_allreduce=True.
      post_allreduce_callbacks: A list of callback functions that takes
        gradients and model variables pairs as input, manipulate them, and
        returns a new gradients and model variables paris. The callback
        functions will be invoked in the list order and right before gradients
        are applied to variables for updates. Default is no callbacks. Only used
        when explicit_allreduce=True.
      train_summary_interval: Step interval for training summaries. If the value
        is a negative number, then training summaries are not enabled.

  Returns:
      Trained model.

  Raises:
      ValueError: (1) When model returned by `model_fn` does not have optimizer
        attribute or when required parameters are set to none. (2) eval args are
        not specified correctly. (3) metric_fn must be a callable if specified.
        (4) sub_model_checkpoint_name is specified, but `sub_model` returned
        by `model_fn` is None.
  """

    if _sentinel is not None:
        raise ValueError('only call `run_customized_training_loop()` '
                         'with named arguments.')

    required_arguments = [
        strategy, model_fn, loss_fn, model_dir, steps_per_epoch, train_input_fn
    ]
    if [arg for arg in required_arguments if arg is None]:
        raise ValueError('`strategy`, `model_fn`, `loss_fn`, `model_dir`, '
                         '`steps_per_epoch` and `train_input_fn` are required '
                         'parameters.')
    if not steps_per_loop:
        if tf.config.list_logical_devices('TPU'):
            # One can't fully utilize a TPU with steps_per_loop=1, so in this case
            # default users to a more useful value.
            steps_per_loop = min(1000, steps_per_epoch)
        else:
            steps_per_loop = 1
        logging.info('steps_per_loop not specified. Using steps_per_loop=%d',
                     steps_per_loop)
    if steps_per_loop > steps_per_epoch:
        logging.warning(
            'steps_per_loop: %d is specified to be greater than '
            ' steps_per_epoch: %d, we will use steps_per_epoch as'
            ' steps_per_loop.', steps_per_loop, steps_per_epoch)
        steps_per_loop = steps_per_epoch
    assert tf.executing_eagerly()

    if run_eagerly:
        if isinstance(strategy, tf.distribute.experimental.TPUStrategy):
            raise ValueError(
                'TPUStrategy should not run eagerly as it heavily relies on graph'
                ' optimization for the distributed system.')

    if eval_input_fn and (eval_steps is None or metric_fn is None):
        raise ValueError(
            '`eval_step` and `metric_fn` are required when `eval_input_fn ` '
            'is not none.')
    if metric_fn and not callable(metric_fn):
        raise ValueError(
            'if `metric_fn` is specified, metric_fn must be a callable.')

    callback_list = tf.keras.callbacks.CallbackList(custom_callbacks)

    total_training_steps = steps_per_epoch * epochs
    train_iterator = _get_input_iterator(train_input_fn, strategy)
    eval_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32)

    with distribution_utils.get_strategy_scope(strategy):
        # To correctly place the model weights on accelerators,
        # model and optimizer should be created in scope.
        model, sub_model = model_fn()
        if not hasattr(model, 'optimizer'):
            raise ValueError('User should set optimizer attribute to model '
                             'inside `model_fn`.')
        if sub_model_export_name and sub_model is None:
            raise ValueError('sub_model_export_name is specified as %s, but '
                             'sub_model is None.' % sub_model_export_name)

        optimizer = model.optimizer

        if init_checkpoint:
            logging.info(
                'Checkpoint file %s found and restoring from '
                'initial checkpoint for core model.', init_checkpoint)
            checkpoint = tf.train.Checkpoint(model=sub_model)
            checkpoint.restore(
                init_checkpoint).assert_existing_objects_matched()
            logging.info('Loading from checkpoint file completed')

        train_loss_metric = tf.keras.metrics.Mean('training_loss',
                                                  dtype=tf.float32)
        eval_metrics = [metric_fn()] if metric_fn else []
        # If evaluation is required, make a copy of metric as it will be used by
        # both train and evaluation.
        train_metrics = [
            metric.__class__.from_config(metric.get_config())
            for metric in eval_metrics
        ]

        # Create summary writers
        if _should_export_summary(strategy):
            summary_dir = os.path.join(model_dir, 'summaries')
        else:
            # In multi worker training we need every worker to write summary, because
            # variables can trigger synchronization on read and synchronization needs
            # all workers to participate.
            summary_dir = tempfile.mkdtemp()
        eval_summary_writer = tf.summary.create_file_writer(
            os.path.join(summary_dir, 'eval'))
        last_summary_step = 0
        if steps_per_loop >= _MIN_SUMMARY_STEPS and train_summary_interval >= 0:
            # Only writes summary when the stats are collected sufficiently over
            # enough steps.
            train_summary_writer = tf.summary.create_file_writer(
                os.path.join(summary_dir, 'train'))
        else:
            train_summary_writer = tf.summary.create_noop_writer()

        # Collects training variables.
        training_vars = model.trainable_variables

        def _replicated_step(inputs):
            """Replicated training step."""

            inputs, labels = inputs
            with tf.GradientTape() as tape:
                model_outputs = model(inputs, training=True)
                loss = loss_fn(labels, model_outputs)
                # Raw loss is used for reporting in metrics/logs.
                raw_loss = loss
                if scale_loss:
                    # Scales down the loss for gradients to be invariant from replicas.
                    loss = loss / strategy.num_replicas_in_sync

            if explicit_allreduce:
                grad_utils.minimize_using_explicit_allreduce(
                    tape, optimizer, loss, training_vars,
                    pre_allreduce_callbacks, post_allreduce_callbacks)
            else:
                if isinstance(
                        optimizer, tf.keras.mixed_precision.experimental.
                        LossScaleOptimizer):
                    with tape:
                        scaled_loss = optimizer.get_scaled_loss(loss)
                    scaled_grads = tape.gradient(scaled_loss, training_vars)
                    grads = optimizer.get_unscaled_gradients(scaled_grads)
                else:
                    grads = tape.gradient(loss, training_vars)
                optimizer.apply_gradients(zip(grads, training_vars))
            # For reporting, the metric takes the mean of losses.
            train_loss_metric.update_state(raw_loss)
            for metric in train_metrics:
                metric.update_state(labels, model_outputs)

        @tf.function
        def train_steps(iterator, steps):
            """Performs distributed training steps in a loop.

      Args:
        iterator: the distributed iterator of training datasets.
        steps: an tf.int32 integer tensor to specify number of steps to run
          inside host training loop.

      Raises:
        ValueError: Any of the arguments or tensor shapes are invalid.
      """
            if not isinstance(steps, tf.Tensor):
                raise ValueError(
                    'steps should be an Tensor. Python object may cause '
                    'retracing.')

            for _ in tf.range(steps):
                strategy.run(_replicated_step, args=(next(iterator), ))

        def train_single_step(iterator):
            """Performs a distributed training step.

      Args:
        iterator: the distributed iterator of training datasets.

      Raises:
        ValueError: Any of the arguments or tensor shapes are invalid.
      """
            strategy.run(_replicated_step, args=(next(iterator), ))

        def test_step(iterator):
            """Calculates evaluation metrics on distributed devices."""
            def _test_step_fn(inputs):
                """Replicated accuracy calculation."""

                inputs, labels = inputs
                model_outputs = model(inputs, training=False)
                for metric in eval_metrics:
                    metric.update_state(labels, model_outputs)
                return model_outputs, labels

            outputs, labels = strategy.run(_test_step_fn,
                                           args=(next(iterator), ))
            outputs = tf.nest.map_structure(
                strategy.experimental_local_results, outputs)
            labels = tf.nest.map_structure(strategy.experimental_local_results,
                                           labels)
            return outputs, labels

        if not run_eagerly:
            train_single_step = tf.function(train_single_step)
            test_step = tf.function(test_step)

        def _run_evaluation(current_training_step, test_iterator):
            """Runs validation steps and aggregate metrics.

      Args:
        current_training_step: tf.int32 tensor containing the current step.
        test_iterator: distributed iterator of test datasets.

      Returns:
        A dict of metic names and values.
      """
            # The last batch of the evaluation is often smaller than previous ones.
            # Moreover, in some distributed pieces it might even be empty. Therefore,
            # different from the way training_loss is calculated, it is needed to
            # gather all the logits and labels here to calculate the evaluation loss
            # outside.
            loss_list, loss_weights = list(), list()
            for _ in range(eval_steps):
                outputs, labels = test_step(test_iterator)
                for cur_logits, cur_labels in zip(outputs, labels):
                    # This is to handle cases when cur_labels is not a single tensor,
                    # but a dict of tensors.
                    cur_weight = tf.shape(tf.nest.flatten(cur_labels)[0])[0]
                    if cur_weight != 0:
                        loss_list.append(
                            loss_fn(cur_labels, cur_logits).numpy())
                        loss_weights.append(cur_weight)
            # The sample_weights are the actual number of examples in each batch,
            # a summation of numbers of examples in each replica if using
            # distributed training.
            eval_loss_metric.update_state(loss_list,
                                          sample_weight=loss_weights)

            logs = {}
            with eval_summary_writer.as_default():
                for metric in [eval_loss_metric
                               ] + eval_metrics + model.metrics:
                    metric_value = _float_metric_value(metric)
                    logs[metric.name] = metric_value
                    logging.info('Step: [%d] Validation %s = %f',
                                 current_training_step, metric.name,
                                 metric_value)
                    tf.summary.scalar(metric.name,
                                      metric_value,
                                      step=current_training_step)
                eval_summary_writer.flush()

            return logs

        # Training loop starts here.
        checkpoint = tf.train.Checkpoint(model=model,
                                         optimizer=optimizer,
                                         global_step=optimizer.iterations)
        sub_model_checkpoint = tf.train.Checkpoint(
            model=sub_model, global_step=optimizer.iterations
        ) if sub_model_export_name else None

        latest_checkpoint_file = tf.train.latest_checkpoint(model_dir)
        if latest_checkpoint_file:
            logging.info(
                'Checkpoint file %s found and restoring from '
                'checkpoint', latest_checkpoint_file)
            checkpoint.restore(latest_checkpoint_file)
            logging.info('Loading from checkpoint file completed')

        current_step = optimizer.iterations.numpy()
        checkpoint_name = 'ctl_step_{step}.ckpt'

        while current_step < total_training_steps:
            if current_step % steps_per_epoch == 0:
                callback_list.on_epoch_begin(
                    int(current_step / steps_per_epoch) + 1)

            # Training loss/metric are taking average over steps inside micro
            # training loop. We reset the their values before each round.
            train_loss_metric.reset_states()
            for metric in train_metrics + model.metrics:
                metric.reset_states()

            callback_list.on_batch_begin(current_step)
            # Runs several steps in the host while loop.
            steps = steps_to_run(current_step, steps_per_epoch, steps_per_loop)

            if tf.config.list_physical_devices('GPU'):
                # TODO(zongweiz): merge with train_steps once tf.while_loop
                # GPU performance bugs are fixed.
                for _ in range(steps):
                    train_single_step(train_iterator)
            else:
                # Converts steps to a Tensor to avoid tf.function retracing.
                train_steps(train_iterator,
                            tf.convert_to_tensor(steps, dtype=tf.int32))
            train_loss = _float_metric_value(train_loss_metric)
            current_step += steps
            callback_list.on_batch_end(current_step - 1, {'loss': train_loss})

            # Updates training logging.
            training_status = 'Train Step: %d/%d  / loss = %s' % (
                current_step, total_training_steps, train_loss)

            if current_step >= last_summary_step + train_summary_interval:
                summary_writer = train_summary_writer
                last_summary_step = current_step
            else:
                summary_writer = tf.summary.create_noop_writer()

            with summary_writer.as_default():
                tf.summary.scalar(train_loss_metric.name,
                                  train_loss,
                                  step=current_step)
                for metric in train_metrics + model.metrics:
                    metric_value = _float_metric_value(metric)
                    training_status += '  %s = %f' % (metric.name,
                                                      metric_value)
                    tf.summary.scalar(metric.name,
                                      metric_value,
                                      step=current_step)
                summary_writer.flush()
            logging.info(training_status)

            if current_step % steps_per_epoch == 0:
                # Save a submodel with the step in the file name after each epoch.
                if sub_model_export_name:
                    _save_checkpoint(
                        strategy, sub_model_checkpoint, model_dir,
                        '%s_step_%d.ckpt' %
                        (sub_model_export_name, current_step))

                # Save model checkpoints and run validation steps after each epoch
                # (with the exception of the final epoch which is handled after the
                # training loop).
                if current_step < total_training_steps:
                    _save_checkpoint(strategy, checkpoint, model_dir,
                                     checkpoint_name.format(step=current_step))
                    logs = None
                    if eval_input_fn:
                        logging.info('Running evaluation after step: %s.',
                                     current_step)
                        logs = _run_evaluation(
                            current_step,
                            _get_input_iterator(eval_input_fn, strategy))
                        # Re-initialize evaluation metric.
                        eval_loss_metric.reset_states()
                        for metric in eval_metrics + model.metrics:
                            metric.reset_states()

                    callback_list.on_epoch_end(
                        int(current_step / steps_per_epoch), logs)

        if sub_model_export_name:
            _save_checkpoint(strategy, sub_model_checkpoint, model_dir,
                             '%s.ckpt' % sub_model_export_name)

        _save_checkpoint(strategy, checkpoint, model_dir,
                         checkpoint_name.format(step=current_step))
        logs = None
        if eval_input_fn:
            logging.info(
                'Running final evaluation after training is complete.')
            logs = _run_evaluation(
                current_step, _get_input_iterator(eval_input_fn, strategy))

        callback_list.on_epoch_end(int(current_step / steps_per_epoch), logs)

        training_summary = {
            'total_training_steps': total_training_steps,
            'train_loss': _float_metric_value(train_loss_metric),
        }
        for metric in model.metrics:
            training_summary[metric.name] = _float_metric_value(metric)
        if eval_metrics:
            # TODO(hongkuny): Cleans up summary reporting in text.
            training_summary['last_train_metrics'] = _float_metric_value(
                train_metrics[0])
            training_summary['eval_metrics'] = _float_metric_value(
                eval_metrics[0])

        write_txt_summary(training_summary, summary_dir)

        if not _should_export_summary(strategy):
            tf.io.gfile.rmtree(summary_dir)

        return model
Beispiel #43
0
#%%
import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)
print("즉시 실행 모드: ", tf.executing_eagerly())
print("GPU ", "사용 가능" if tf.config.experimental.list_physical_devices("GPU") else "사용 불가능")

# keras.datasets.mnist
#%%load data
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
# print(train_images[0],train_labels[0])
print(train_images.shape)
Beispiel #44
0
    def train(self, check_prior, **name_to_observation_or_conditioning):

        name_to_observation, name_to_conditioning = self._split_observations_and_conditioning(
            name_to_observation_or_conditioning)

        if check_prior:

            # Build the 'prior', i.e. the generative without variational substitutions
            name_to_unconditioned_generative_rv = {}

            def rv_to_value(rv, name):
                assert name not in name_to_unconditioned_generative_rv, 'duplicate variable {} in unconditioned generative'.format(
                    name)
                name_to_unconditioned_generative_rv[name] = rv
                return rv.sample()

            def add_loss(_):
                assert False, 'adding a loss in the unconditioned generative is not supported'  # in theory we could make this do the obvious thing (just add the loss), but the behaviour is strange given that all other calls to add_loss are integrated-over

            with tf.name_scope('generative/unconditioned'):
                unconditioned_generative = self.generative(
                    _RvBuilder(rv_to_value), GenerativeMode.UNCONDITIONED,
                    add_loss, **name_to_conditioning)

            for name in name_to_observation:
                assert name in name_to_unconditioned_generative_rv, 'observed variable {} not present in generative'.format(
                    name)

            for name in self.integrated_name_to_values:
                assert name in name_to_unconditioned_generative_rv, 'integrated variable {} not present in generative'.format(
                    name)
                assert name not in name_to_observation, 'integrated variable {} may not also be observed'.format(
                    name)

        if tf.executing_eagerly():
            random_seed = int(
                tf.random.uniform([], 0, tf.int32.max, dtype=tf.int32))
        elif len(self.integrated_name_to_values) > 0:
            print(
                'warning: IntegratedEagerKlqp does not reuse randomness correctly in graph mode!'
            )

        total_weighted_log_Px = 0.
        total_weighted_log_Pz = 0.
        total_weighted_log_Qz = 0.
        total_weighted_additional_loss = 0.
        for integrated_name_to_value in self._generate_integrated_values():

            # Ensure that we use the same values for all random variables (up to conditioning on the integrated variables) for each
            # value of the integrated-over variables in the summation
            if tf.executing_eagerly():
                tf.random.set_random_seed(
                    random_seed
                )  # note that this relies on us actually being in eager mode!

            additional_losses = []

            def add_loss(loss):
                additional_losses.append(loss)

            # Build the variational, also using variational samples for ancestral substitutions
            name_to_substituted_value = dict(name_to_observation)
            name_to_variational_rv = {}

            def rv_to_value(rv, name):
                if check_prior:
                    assert name in name_to_unconditioned_generative_rv, 'variational rv {} not present in generative'.format(
                        name)
                assert name not in name_to_variational_rv, '{} already has variational binding'.format(
                    name)
                assert name not in name_to_observation, '{} may not be given by variational, as it is observed'.format(
                    name)
                name_to_variational_rv[name] = rv
                if name in integrated_name_to_value:
                    substituted_value = integrated_name_to_value[name]
                else:
                    substituted_value = rv.sample()
                name_to_substituted_value[name] = substituted_value
                return substituted_value

            with tf.name_scope('variational/conditioned'):
                self.variational(_RvBuilder(rv_to_value), add_loss,
                                 **name_to_observation, **name_to_conditioning)

            # Build the 'conditioned generative', with values substituted from the variational and observations
            name_to_conditioned_generative_rv = {}

            def rv_to_value(rv, name):
                assert name not in name_to_conditioned_generative_rv, 'duplicate variable {} in conditioned generative'.format(
                    name)
                if name not in name_to_substituted_value:
                    assert name not in integrated_name_to_value, 'variable {} is integrated over, but has no variational distribution; this case is not supported'.format(
                        name)
                    print(
                        'warning: variable {} has neither variational distribution nor observed value, hence will be marginalised by sampling'
                        .format(name))
                    name_to_substituted_value[name] = rv.sample()
                name_to_conditioned_generative_rv[name] = rv
                return name_to_substituted_value[name]

            with tf.name_scope('generative/conditioned'):
                self.generative(_RvBuilder(rv_to_value),
                                GenerativeMode.CONDITIONED, add_loss,
                                **name_to_conditioning)

            def mean_over_nonbatch_axes(x):
                shape = x.shape
                if len(
                        shape
                ) < 2:  # should never be zero; ideally it is always one, if batch-vs-event indexing of RVs is correct
                    return x
                else:
                    return tf.reduce_mean(x, axis=tuple(range(1, len(shape))))

            log_Px = sum(
                mean_over_nonbatch_axes(
                    name_to_conditioned_generative_rv[name].log_prob(
                        name_to_substituted_value[name]))
                for name in name_to_observation)
            log_Pz = sum(
                mean_over_nonbatch_axes(
                    name_to_conditioned_generative_rv[name].log_prob(
                        name_to_substituted_value[name])) for name in
                name_to_variational_rv  # variational not generative so we only include things with variational (not prior) substitutions
                if
                name not in name_to_observation  # ...as it's in P(x) instead
            )
            log_Qz = sum(
                mean_over_nonbatch_axes(name_to_variational_rv[name].log_prob(
                    name_to_substituted_value[name]))
                for name in name_to_variational_rv)

            Q_integrated_values = tf.exp(
                sum([
                    mean_over_nonbatch_axes(
                        name_to_variational_rv[name].log_prob(value))
                    for name, value in integrated_name_to_value.items()
                ], 0.))  # :: iib

            additional_loss = sum(additional_losses)

            total_weighted_log_Px += tf.reduce_mean(Q_integrated_values *
                                                    log_Px)
            total_weighted_log_Pz += tf.reduce_mean(Q_integrated_values *
                                                    log_Pz)
            total_weighted_log_Qz += tf.reduce_mean(Q_integrated_values *
                                                    log_Qz)
            total_weighted_additional_loss += tf.reduce_mean(
                Q_integrated_values * additional_loss)

        beta = self.beta() if callable(self.beta) else self.beta
        loss = -(total_weighted_log_Px + beta *
                 (total_weighted_log_Pz - total_weighted_log_Qz)
                 ) + total_weighted_additional_loss  # :: iib

        if self.verbose:
            if tf.executing_eagerly(
            ):  # if we can, print with nice formatting (i.e. two decimal places)
                print(
                    'log P(x) = {:.2f}, beta * KL= {:.2f} (log P(z) = {:.2f}, log Q(z) = {:.2f}), L* = {:.2f}, total loss = {:.2f}'
                    .format(
                        total_weighted_log_Px,
                        beta * (total_weighted_log_Pz - total_weighted_log_Qz),
                        total_weighted_log_Pz, total_weighted_log_Qz,
                        total_weighted_additional_loss, loss))
            else:
                tf.print('log P(x) = ',
                         total_weighted_log_Px,
                         ', beta * KL= ',
                         beta *
                         (total_weighted_log_Pz - total_weighted_log_Qz),
                         ' (log P(z) = ',
                         total_weighted_log_Pz,
                         ', log Q(z) = ',
                         total_weighted_log_Qz,
                         '), L* = ',
                         total_weighted_additional_loss,
                         ', total loss = ',
                         loss,
                         sep='')

        if check_prior:
            return loss, unconditioned_generative
        else:
            return loss
    def loss_gradient(self, x: np.ndarray, y: np.ndarray, training_mode: bool = False, **kwargs) -> np.ndarray:
        """
        Compute the gradient of the loss function w.r.t. `x`.

        :param x: Sample input with shape as expected by the model.
        :param y: Correct labels, one-vs-rest encoding.
        :param training_mode: `True` for model set to training mode and `'False` for model set to evaluation mode.
        :param sampling: True if loss gradients should be determined with Monte Carlo sampling.
        :type sampling: `bool`
        :return: Array of gradients of the same shape as `x`.
        """
        import tensorflow as tf  # lgtm [py/repeated-import]

        sampling = kwargs.get("sampling")

        if sampling:
            # Apply preprocessing
            x_preprocessed, _ = self._apply_preprocessing(x, y, fit=False)

            if tf.executing_eagerly():
                with tf.GradientTape() as tape:
                    inputs_t = tf.convert_to_tensor(x_preprocessed)
                    tape.watch(inputs_t)

                    inputs_repeat_t = tf.repeat(inputs_t, repeats=self.sample_size, axis=0)

                    noise = tf.random.normal(
                        shape=inputs_repeat_t.shape,
                        mean=0.0,
                        stddev=self.scale,
                        dtype=inputs_repeat_t.dtype,
                        seed=None,
                        name=None,
                    )

                    inputs_noise_t = inputs_repeat_t + noise
                    if self.clip_values is not None:
                        inputs_noise_t = tf.clip_by_value(
                            inputs_noise_t,
                            clip_value_min=self.clip_values[0],
                            clip_value_max=self.clip_values[1],
                            name=None,
                        )

                    model_outputs = self._model(inputs_noise_t, training=training_mode)
                    softmax = tf.nn.softmax(model_outputs, axis=1, name=None)
                    average_softmax = tf.reduce_mean(
                        tf.reshape(softmax, shape=(-1, self.sample_size, model_outputs.shape[-1])), axis=1
                    )

                    loss = tf.reduce_mean(
                        tf.keras.losses.categorical_crossentropy(
                            y_true=y, y_pred=average_softmax, from_logits=False, label_smoothing=0
                        )
                    )

                gradients = tape.gradient(loss, inputs_t).numpy()
            else:  # pragma: no cover
                raise ValueError("Expecting eager execution.")

            # Apply preprocessing gradients
            gradients = self._apply_preprocessing_gradient(x, gradients)

        else:
            gradients = TensorFlowV2Classifier.loss_gradient(self, x=x, y=y, training_mode=training_mode, **kwargs)

        return gradients
Beispiel #46
0
  def test_sync_replicas(self, create_gan_model_fn, create_global_step):
    if tf.executing_eagerly():
      # None of the usual utilities work in eager.
      return
    model = create_gan_model_fn()
    loss = tfgan.gan_loss(model)
    num_trainable_vars = len(get_trainable_variables())

    if create_global_step:
      gstep = tf.compat.v1.get_variable(
          'custom_gstep',
          dtype=tf.int32,
          initializer=0,
          trainable=False)
      tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.GLOBAL_STEP, gstep)

    g_opt = get_sync_optimizer()
    d_opt = get_sync_optimizer()
    train_ops = tfgan.gan_train_ops(
        model, loss, generator_optimizer=g_opt, discriminator_optimizer=d_opt)
    self.assertIsInstance(train_ops, tfgan.GANTrainOps)
    # No new trainable variables should have been added.
    self.assertLen(get_trainable_variables(), num_trainable_vars)

    # Sync hooks should be populated in the GANTraintf.
    self.assertLen(train_ops.train_hooks, 2)
    for hook in train_ops.train_hooks:
      self.assertIsInstance(hook, get_sync_optimizer_hook_type())
    sync_opts = [hook._sync_optimizer for hook in train_ops.train_hooks]
    self.assertSetEqual(frozenset(sync_opts), frozenset((g_opt, d_opt)))

    g_sync_init_op = g_opt.get_init_tokens_op(num_tokens=1)
    d_sync_init_op = d_opt.get_init_tokens_op(num_tokens=1)

    # Check that update op is run properly.
    global_step = tf.compat.v1.train.get_or_create_global_step()
    with self.cached_session() as sess:
      sess.run(tf.compat.v1.global_variables_initializer())
      sess.run(tf.compat.v1.local_variables_initializer())

      sess.run(g_opt.chief_init_op)
      sess.run(d_opt.chief_init_op)

      gstep_before = sess.run(global_step)

      # Start required queue runner for SyncReplicasOptimizer.
      coord = tf.train.Coordinator()
      g_threads = g_opt.get_chief_queue_runner().create_threads(sess, coord)
      d_threads = d_opt.get_chief_queue_runner().create_threads(sess, coord)

      sess.run(g_sync_init_op)
      sess.run(d_sync_init_op)

      sess.run(train_ops.generator_train_op)
      # Check that global step wasn't incremented.
      self.assertEqual(gstep_before, sess.run(global_step))

      sess.run(train_ops.discriminator_train_op)
      # Check that global step wasn't incremented.
      self.assertEqual(gstep_before, sess.run(global_step))

      coord.request_stop()
      coord.join(g_threads + d_threads)
  def testMatrixEvent(self):
    batch_shape = [2]
    event_shape = [2, 3, 3]
    batch_shape_pl = tf.compat.v1.placeholder_with_default(
        input=np.int32(batch_shape), shape=None, name="dynamic_batch_shape")
    event_shape_pl = tf.compat.v1.placeholder_with_default(
        input=np.int32(event_shape), shape=None, name="dynamic_event_shape")

    scale = 2.
    loc = 0.
    fake_mvn_dynamic = self._cls()(
        distribution=tfd.Normal(loc=loc, scale=scale),
        bijector=DummyMatrixTransform(),
        batch_shape=batch_shape_pl,
        event_shape=event_shape_pl,
        validate_args=True)

    fake_mvn_static = self._cls()(
        distribution=tfd.Normal(loc=loc, scale=scale),
        bijector=DummyMatrixTransform(),
        batch_shape=batch_shape,
        event_shape=event_shape,
        validate_args=True)

    def actual_mvn_log_prob(x):
      # This distribution is the normal PDF, reduced over the
      # last 3 dimensions + a jacobian term which corresponds
      # to the determinant of x.
      return (np.sum(stats.norm(loc, scale).logpdf(x), axis=(-1, -2, -3)) +
              np.sum(np.linalg.det(x), axis=-1))

    self.assertAllEqual([2, 3, 3], fake_mvn_static.event_shape)
    self.assertAllEqual([2], fake_mvn_static.batch_shape)

    if not tf.executing_eagerly():
      self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape)
      self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape)

    num_samples = 5e3
    for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]:
      # Ensure sample works by checking first, second moments.
      y = fake_mvn.sample(int(num_samples), seed=0)
      x = y[0:5, ...]
      [
          x_,
          fake_event_shape_,
          fake_batch_shape_,
          fake_log_prob_,
          fake_prob_,
      ] = self.evaluate([
          x,
          fake_mvn.event_shape_tensor(),
          fake_mvn.batch_shape_tensor(),
          fake_mvn.log_prob(x),
          fake_mvn.prob(x),
      ])

      # Ensure all other functions work as intended.
      self.assertAllEqual([5, 2, 2, 3, 3], x_.shape)
      self.assertAllEqual([2, 3, 3], fake_event_shape_)
      self.assertAllEqual([2], fake_batch_shape_)
      self.assertAllClose(
          actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6)
      self.assertAllClose(
          np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5)
Beispiel #48
0
 def test_output_type(self, create_fn, expected_tuple_type):
   """Test that output type is as expected."""
   if tf.executing_eagerly():
     # None of the usual utilities work in eager.
     return
   self.assertIsInstance(create_fn(), expected_tuple_type)
Beispiel #49
0
 def test_eval_count(self):
     """Tests that the evaluation count is reported correctly."""
     if tf.executing_eagerly():
         self._test_eval_count_eager()
     else:
         self._test_eval_count_graph()
          input_filenames_or_glob,
          sample_rate,
          FLAGS.debug,
          FLAGS.embedding_names,
          FLAGS.embedding_modules,
          FLAGS.module_output_keys,
          FLAGS.audio_key,
          FLAGS.sample_rate_key,
          FLAGS.label_key,
          FLAGS.speaker_id_key,
          FLAGS.average_over_time,
          FLAGS.delete_audio_from_output,
          output_filename,
          input_format=input_format,
          output_format=output_format,
          suffix=i)


if __name__ == '__main__':
  flags.mark_flags_as_required([
      'output_filename', 'embedding_names', 'embedding_modules',
      'module_output_keys', 'audio_key', 'label_key',
  ])
  flags.mark_flags_as_mutual_exclusive(['input_glob', 'tfds_dataset'],
                                       required=True)
  flags.mark_flags_as_mutual_exclusive(
      ['tfds_dataset', 'sample_rate_key', 'sample_rate'], required=True)
  tf.compat.v2.enable_v2_behavior()
  assert tf.executing_eagerly()
  app.run(main)
Beispiel #51
0
import sys
import unittest
import tensorflow as tf

import tfdiffeq

sys.path.insert(0, '..')
from tests.problems import construct_problem
from tests.check_grad import gradcheck

if not tf.executing_eagerly():
    tf.enable_v2_behavior()

eps = 1e-5

# torch.set_default_dtype(torch.float64)
TEST_DEVICE = "gpu:0" if tf.test.is_gpu_available() else "cpu:0"


def max_abs(tensor):
    return tf.reduce_max(tf.abs(tensor))


class TestCollectionState(unittest.TestCase):
    def test_dopri5(self):
        f, y0, t_points, sol = construct_problem(TEST_DEVICE)

        tuple_f = lambda t, y: (f(t, y[0]), f(t, y[1]))
        tuple_y0 = (y0, y0)

        tuple_y = tfdiffeq.odeint(tuple_f, tuple_y0, t_points, method='dopri5')
def restore_models_and_optimizers_and_alpha(sess, gen_model, dis_model, mapping_network, sampling_model,
                                            optimizer_g, optimizer_d, optimizer_m, save_paths):
    """
    :param sess: session if in graph mode, otherwise unused
    :param gen_model: generator with defined variables
    :param dis_model: discriminator with defined variables
    :param optimizer_g: generator's optimizer
    :param optimizer_d: discriminator's optimizer
    :param save_paths: paths containing models, optimizers, and alpha on disk
    :return: read alpha value
    """
    if gen_model is not None:
        gen_model.load_weights(save_paths.gen_model, by_name=True)
    if dis_model is not None:
        dis_model.load_weights(save_paths.dis_model, by_name=True)
    if mapping_network is not None:
        mapping_network.load_weights(save_paths.mapping_network, by_name=True)
    if sampling_model is not None:
        sampling_model.load_weights(save_paths.sampling_model, by_name=True)

    if optimizer_g is not None:
        vars_g = filter_vars_with_checkpoint(chkpt_path=save_paths.gen_optim,
                                             var_list=optimizer_g.variables())
    if optimizer_d is not None:
        vars_d = filter_vars_with_checkpoint(chkpt_path=save_paths.dis_optim,
                                             var_list=optimizer_d.variables())
    if optimizer_m is not None and \
            mapping_network is not None and \
            os.path.exists(os.path.dirname(save_paths.mn_optim)):
        vars_mn = filter_vars_with_checkpoint(chkpt_path=save_paths.mn_optim,
                                              var_list=optimizer_m.variables())
    if tf.executing_eagerly():
        if optimizer_d is not None:
            saver_d = tf.contrib.eager.Saver(var_list=vars_d)
            saver_d.restore(file_prefix=tf.train.latest_checkpoint(os.path.dirname(save_paths.dis_optim)))
        if optimizer_g is not None:
            saver_g = tf.contrib.eager.Saver(var_list=vars_g)
            saver_g.restore(file_prefix=tf.train.latest_checkpoint(os.path.dirname(save_paths.gen_optim)))

        if optimizer_m is not None and os.path.exists(os.path.dirname(save_paths.mn_optim)):
            saver_g = tf.contrib.eager.Saver(var_list=vars_mn)
            saver_g.restore(file_prefix=tf.train.latest_checkpoint(os.path.dirname(save_paths.mn_optim)))
    else:
        if optimizer_d is not None:
            saver_d = tf.train.Saver(var_list=vars_d)
            saver_d.restore(sess=sess,
                            save_path=tf.train.latest_checkpoint(os.path.dirname(save_paths.dis_optim)))

        if optimizer_g is not None:
            saver_g = tf.train.Saver(var_list=vars_g)
            saver_g.restore(sess=sess,
                            save_path=tf.train.latest_checkpoint(os.path.dirname(save_paths.gen_optim)))

        if optimizer_m is not None and \
                mapping_network is not None and \
                os.path.exists(os.path.dirname(save_paths.mn_optim)):
            saver_g = tf.train.Saver(var_list=vars_mn)
            saver_g.restore(sess=sess,
                            save_path=tf.train.latest_checkpoint(os.path.dirname(save_paths.mn_optim)))

    step = None
    alpha = None
    if save_paths.step is not None:
        with open(save_paths.step, "r") as f:
            step = int(f.read())
    if save_paths.alpha is not None:
        with open(save_paths.alpha, "r") as f:
            alpha = float(f.read())
    return alpha, step
Beispiel #53
0
 def initialize(self):
     if tf.executing_eagerly():
         return tf.no_op()
     else:
         return self._initializers
Beispiel #54
0
    def get_estimator_eval_metric_ops(self, eval_dict):
        """Returns metric ops for use in tf.estimator.EstimatorSpec.

    Args:
      eval_dict: A dictionary that holds an image, groundtruth, and detections
        for a batched example. Note that, we use only the first example for
        visualization. See eval_util.result_dict_for_batched_example() for a
        convenient method for constructing such a dictionary. The dictionary
        contains
        fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
        fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
          tensor containing the size of the original image.
        fields.InputDataFields.true_image_shape: [batch_size, 3]
          tensor containing the spatial size of the upadded original image.
        fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
          float32 tensor with groundtruth boxes in range [0.0, 1.0].
        fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
          int64 tensor with 1-indexed groundtruth classes.
        fields.InputDataFields.groundtruth_instance_masks - (optional)
          [batch_size, num_boxes, H, W] int64 tensor with instance masks.
        fields.DetectionResultFields.detection_boxes - [batch_size,
          max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
          1.0].
        fields.DetectionResultFields.detection_classes - [batch_size,
          max_num_boxes] int64 tensor with 1-indexed detection classes.
        fields.DetectionResultFields.detection_scores - [batch_size,
          max_num_boxes] float32 tensor with detection scores.
        fields.DetectionResultFields.detection_masks - (optional) [batch_size,
          max_num_boxes, H, W] float32 tensor of binarized masks.
        fields.DetectionResultFields.detection_keypoints - (optional)
          [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
          keypoints.

    Returns:
      A dictionary of image summary names to tuple of (value_op, update_op). The
      `update_op` is the same for all items in the dictionary, and is
      responsible for saving a single side-by-side image with detections and
      groundtruth. Each `value_op` holds the tf.summary.image string for a given
      image.
    """
        if self._max_examples_to_draw == 0:
            return {}
        images = self.images_from_evaluation_dict(eval_dict)

        def get_images():
            """Returns a list of images, padded to self._max_images_to_draw."""
            images = self._images
            while len(images) < self._max_examples_to_draw:
                images.append(np.array(0, dtype=np.uint8))
            self.clear()
            return images

        def image_summary_or_default_string(summary_name, image):
            """Returns image summaries for non-padded elements."""
            return tf.cond(tf.equal(tf.size(tf.shape(image)), 4),
                           lambda: tf.summary.image(summary_name, image),
                           lambda: tf.constant(''))

        if tf.executing_eagerly():
            update_op = self.add_images([[images[0]]])
            image_tensors = get_images()
        else:
            update_op = tf.py_func(self.add_images, [[images[0]]], [])
            image_tensors = tf.py_func(get_images, [],
                                       [tf.uint8] * self._max_examples_to_draw)
        eval_metric_ops = {}
        for i, image in enumerate(image_tensors):
            summary_name = self._summary_name_prefix + '/' + str(i)
            value_op = image_summary_or_default_string(summary_name, image)
            eval_metric_ops[summary_name] = (value_op, update_op)
        return eval_metric_ops
    def _testWithMaybeMultiAttention(
        self,
        is_multi,
        create_attention_mechanisms,
        expected_final_output,
        expected_final_state,
        attention_mechanism_depths,
        alignment_history=False,
        expected_final_alignment_history=None,
        attention_layer_sizes=None,
        attention_layers=None,
        create_query_layer=False,
        create_memory_layer=True,
        create_attention_kwargs=None,
    ):
        # Allow is_multi to be True with a single mechanism to enable test for
        # passing in a single mechanism in a list.
        assert len(create_attention_mechanisms) == 1 or is_multi
        encoder_sequence_length = [3, 2, 3, 1, 1]
        decoder_sequence_length = [2, 0, 1, 2, 3]
        batch_size = 5
        encoder_max_time = 8
        decoder_max_time = 4
        input_depth = 7
        encoder_output_depth = 10
        cell_depth = 9
        create_attention_kwargs = create_attention_kwargs or {}

        if attention_layer_sizes is not None:
            # Compute sum of attention_layer_sizes. Use encoder_output_depth if
            # None.
            attention_depth = sum(
                attention_layer_size or encoder_output_depth
                for attention_layer_size in attention_layer_sizes)
        elif attention_layers is not None:
            # Compute sum of attention_layers output depth.
            attention_depth = sum(
                attention_layer.compute_output_shape(
                    [batch_size, cell_depth +
                     encoder_output_depth]).dims[-1].value
                for attention_layer in attention_layers)
        else:
            attention_depth = encoder_output_depth * len(
                create_attention_mechanisms)

        decoder_inputs = np.random.randn(batch_size, decoder_max_time,
                                         input_depth).astype(np.float32)
        encoder_outputs = np.random.randn(batch_size, encoder_max_time,
                                          encoder_output_depth).astype(
                                              np.float32)

        attention_mechanisms = []
        for creator, depth in zip(create_attention_mechanisms,
                                  attention_mechanism_depths):
            # Create a memory layer with deterministic initializer to avoid
            # randomness in the test between graph and eager.
            if create_query_layer:
                create_attention_kwargs["query_layer"] = tf.keras.layers.Dense(
                    depth, kernel_initializer="ones", use_bias=False)
            if create_memory_layer:
                create_attention_kwargs[
                    "memory_layer"] = tf.keras.layers.Dense(
                        depth, kernel_initializer="ones", use_bias=False)

            attention_mechanisms.append(
                creator(
                    units=depth,
                    memory=encoder_outputs,
                    memory_sequence_length=encoder_sequence_length,
                    **create_attention_kwargs,
                ))

        with self.cached_session(use_gpu=True):
            attention_layer_size = attention_layer_sizes
            attention_layer = attention_layers
            if not is_multi:
                if attention_layer_size is not None:
                    attention_layer_size = attention_layer_size[0]
                if attention_layer is not None:
                    attention_layer = attention_layer[0]
            cell = tf.keras.layers.LSTMCell(
                cell_depth,
                recurrent_activation="sigmoid",
                kernel_initializer="ones",
                recurrent_initializer="ones",
            )
            cell = wrapper.AttentionWrapper(
                cell,
                attention_mechanisms if is_multi else attention_mechanisms[0],
                attention_layer_size=attention_layer_size,
                alignment_history=alignment_history,
                attention_layer=attention_layer,
            )
            if cell._attention_layers is not None:
                for layer in cell._attention_layers:
                    layer.kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform(
                        seed=1337)

            sampler = sampler_py.TrainingSampler()
            my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
            initial_state = cell.get_initial_state(dtype=tf.float32,
                                                   batch_size=batch_size)
            final_outputs, final_state, _ = my_decoder(
                decoder_inputs,
                initial_state=initial_state,
                sequence_length=decoder_sequence_length,
            )

            self.assertIsInstance(final_outputs,
                                  basic_decoder.BasicDecoderOutput)
            self.assertIsInstance(final_state, wrapper.AttentionWrapperState)

            expected_time = (max(decoder_sequence_length)
                             if tf.executing_eagerly() else None)
            self.assertEqual(
                (batch_size, expected_time, attention_depth),
                tuple(final_outputs.rnn_output.get_shape().as_list()),
            )
            self.assertEqual(
                (batch_size, expected_time),
                tuple(final_outputs.sample_id.get_shape().as_list()),
            )

            self.assertEqual(
                (batch_size, attention_depth),
                tuple(final_state.attention.get_shape().as_list()),
            )
            self.assertEqual(
                (batch_size, cell_depth),
                tuple(final_state.cell_state[0].get_shape().as_list()),
            )
            self.assertEqual(
                (batch_size, cell_depth),
                tuple(final_state.cell_state[1].get_shape().as_list()),
            )

            if alignment_history:
                if is_multi:
                    state_alignment_history = []
                    for history_array in final_state.alignment_history:
                        history = history_array.stack()
                        self.assertEqual(
                            (expected_time, batch_size, encoder_max_time),
                            tuple(history.get_shape().as_list()),
                        )
                        state_alignment_history.append(history)
                    state_alignment_history = tuple(state_alignment_history)
                else:
                    state_alignment_history = final_state.alignment_history.stack(
                    )
                    self.assertEqual(
                        (expected_time, batch_size, encoder_max_time),
                        tuple(state_alignment_history.get_shape().as_list()),
                    )
                tf.nest.assert_same_structure(
                    cell.state_size,
                    cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32),
                )
                # Remove the history from final_state for purposes of the
                # remainder of the tests.
                final_state = final_state._replace(alignment_history=())  # pylint: disable=protected-access
            else:
                state_alignment_history = ()

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate({
                "final_outputs":
                final_outputs,
                "final_state":
                final_state,
                "state_alignment_history":
                state_alignment_history,
            })

            final_output_info = tf.nest.map_structure(
                get_result_summary, eval_result["final_outputs"])
            final_state_info = tf.nest.map_structure(
                get_result_summary, eval_result["final_state"])
            print("final_output_info: ", final_output_info)
            print("final_state_info: ", final_state_info)

            tf.nest.map_structure(self.assertAllCloseOrEqual,
                                  expected_final_output, final_output_info)
            tf.nest.map_structure(self.assertAllCloseOrEqual,
                                  expected_final_state, final_state_info)
            # by default, the wrapper emits attention as output
            if alignment_history:
                final_alignment_history_info = tf.nest.map_structure(
                    get_result_summary, eval_result["state_alignment_history"])
                print("final_alignment_history_info: ",
                      final_alignment_history_info)
                tf.nest.map_structure(
                    self.assertAllCloseOrEqual,
                    # outputs are batch major but the stacked TensorArray is
                    # time major
                    expected_final_alignment_history,
                    final_alignment_history_info,
                )
Beispiel #56
0
# this is all preset automatically
from __future__ import absolute_import, division, print_function

import os
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()

print("TensorFlow version: {}".format(tf.VERSION))
print("Eager execution: {}".format(tf.executing_eagerly()))
#

# this can be uploaded by the user
train_dataset_url = "http://download.tensorflow.org/data/iris_training.csv"
train_dataset_fp = tf.keras.utils.get_file(
    fname=os.path.basename(train_dataset_url), origin=train_dataset_url)

print("Local copy of the dataset file: {}".format(train_dataset_fp))

#


# this depends on input - worry about later
def parse_csv(line):
    example_defaults = [[0.], [0.], [0.], [0.], [0]]  # sets field types
    parsed_line = tf.decode_csv(line, example_defaults)
    # First 4 fields are features, combine into single tensor
    features = tf.reshape(parsed_line[:-1], shape=(4, ))
Beispiel #57
0
#%%
import tensorflow as tf
import tensorflow.keras as K
from tensorflow.keras import layers
from tensorflow.keras import preprocessing
print('TensorFlow version:', tf.__version__)
print('Eager Execution Mode:', tf.executing_eagerly())
print('available GPU:', tf.config.list_physical_devices('GPU'))
from tensorflow.python.client import device_lib
print('==========================================')
print(device_lib.list_local_devices())
tf.debugging.set_log_device_placement(False)
#%%
from tqdm import tqdm
import pandas as pd
import numpy as np
import math
import time
import re
import matplotlib.pyplot as plt
from PIL import Image
from pprint import pprint
import random
from scipy import sparse
import os
os.chdir('/Users/anseunghwan/Documents/GitHub/textmining')

import Modules
#%%
from matplotlib import rc
rc('font', family='AppleGothic')
Beispiel #58
0
 def test_run_one_train_step(self):
     if tf.executing_eagerly():
         # `tfgan.gan_model` doesn't work when executing eagerly.
         return
     train_lib.train(self.hparams)
Beispiel #59
0
def dynamic_decode(decoder,
                   impute_finished=False,
                   maximum_iterations=None,
                   parallel_iterations=32,
                   swap_memory=False,
                   scope=None):
    """Perform dynamic decoding with `decoder`.

  Calls initialize() once and step() repeatedly on the Decoder object.

  Args:
    decoder: A `Decoder` instance.
    impute_finished: Python boolean.  If `True`, then states for batch
      entries which are marked as finished get copied through and the
      corresponding outputs get zeroed out.  This causes some slowdown at
      each time step, but ensures that the final state and outputs have
      the correct values and that backprop ignores time steps that were
      marked as finished.
    maximum_iterations: `int32` scalar, maximum allowed number of decoding
       steps.  Default is `None` (decode until the decoder is fully done).
    parallel_iterations: Argument passed to `tf.while_loop`.
    swap_memory: Argument passed to `tf.while_loop`.
    scope: Optional variable scope to use.

  Returns:
    `(final_outputs, final_state, final_sequence_lengths)`.

  Raises:
    TypeError: if `decoder` is not an instance of `Decoder`.
    ValueError: if `maximum_iterations` is provided but is not a scalar.
  """
    if not isinstance(decoder, Decoder):
        raise TypeError("Expected decoder to be type Decoder, but saw: %s" %
                        type(decoder))

    with tf.variable_scope(scope, "decoder") as varscope:
        # Determine context types.
        ctxt = tf.get_default_graph()._get_control_flow_context()  # pylint: disable=protected-access
        is_xla = control_flow_util.GetContainingXLAContext(ctxt) is not None
        in_while_loop = (control_flow_util.GetContainingWhileContext(ctxt)
                         is not None)
        # Properly cache variable values inside the while_loop.
        # Don't set a caching device when running in a loop, since it is possible
        # that train steps could be wrapped in a tf.while_loop. In that scenario
        # caching prevents forward computations in loop iterations from re-reading
        # the updated weights.
        if not tf.executing_eagerly() and not in_while_loop:
            if varscope.caching_device is None:
                varscope.set_caching_device(lambda op: op.device)

        if maximum_iterations is not None:
            maximum_iterations = tf.convert_to_tensor(
                maximum_iterations, dtype=tf.int32, name="maximum_iterations")
            if maximum_iterations.get_shape().ndims != 0:
                raise ValueError("maximum_iterations must be a scalar")

        initial_finished, initial_inputs, initial_state = decoder.initialize()

        zero_outputs = _create_zero_outputs(decoder.output_size,
                                            decoder.output_dtype)

        if is_xla and maximum_iterations is None:
            raise ValueError(
                "maximum_iterations is required for XLA compilation.")
        if maximum_iterations is not None:
            initial_finished = tf.logical_or(initial_finished,
                                             0 >= maximum_iterations)
        initial_sequence_lengths = tf.zeros_like(initial_finished,
                                                 dtype=tf.int32)
        initial_time = tf.constant(0, dtype=tf.int32)

        dynamic_size = maximum_iterations is None or not is_xla

        def _create_ta(s, d):
            return tf.TensorArray(
                dtype=d,
                size=0 if dynamic_size else maximum_iterations,
                dynamic_size=dynamic_size,
                element_shape=s)

        initial_outputs_ta = tf.contrib.framework.nest.map_structure(
            _create_ta, decoder.output_size, decoder.output_dtype)

        def condition(unused_time, unused_outputs_ta, unused_state,
                      unused_inputs, finished, unused_sequence_lengths):
            return tf.logical_not(tf.reduce_all(finished))

        def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
            """Internal while_loop body.

      Args:
        time: scalar int32 tensor.
        outputs_ta: structure of TensorArray.
        state: (structure of) state tensors and TensorArrays.
        inputs: (structure of) input tensors.
        finished: bool tensor (keeping track of what's finished).
        sequence_lengths: int32 tensor (keeping track of time of finish).

      Returns:
        `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
          next_sequence_lengths)`.
        ```
      """
            (next_outputs, decoder_state, next_inputs,
             decoder_finished) = decoder.step(time, inputs, state)
            if decoder.tracks_own_finished:
                next_finished = decoder_finished
            else:
                next_finished = tf.logical_or(decoder_finished, finished)
            next_sequence_lengths = tf.where(
                tf.logical_not(finished),
                tf.fill(tf.shape(sequence_lengths), time + 1),
                sequence_lengths)

            tf.contrib.framework.nest.assert_same_structure(
                state, decoder_state)
            tf.contrib.framework.nest.assert_same_structure(
                outputs_ta, next_outputs)
            tf.contrib.framework.nest.assert_same_structure(
                inputs, next_inputs)

            # Zero out output values past finish
            if impute_finished:
                emit = tf.contrib.framework.nest.map_structure(
                    lambda out, zero: tf.where(finished, zero, out),
                    next_outputs, zero_outputs)
            else:
                emit = next_outputs

            # Copy through states past finish
            def _maybe_copy_state(new, cur):
                # TensorArrays and scalar states get passed through.
                if isinstance(cur, tf.TensorArray):
                    pass_through = True
                else:
                    new.set_shape(cur.shape)
                    pass_through = (new.shape.ndims == 0)
                return new if pass_through else tf.where(finished, cur, new)

            if impute_finished:
                next_state = tf.contrib.framework.nest.map_structure(
                    _maybe_copy_state, decoder_state, state)
            else:
                next_state = decoder_state

            outputs_ta = tf.contrib.framework.nest.map_structure(
                lambda ta, out: ta.write(time, out), outputs_ta, emit)
            return (time + 1, outputs_ta, next_state, next_inputs,
                    next_finished, next_sequence_lengths)

        res = tf.while_loop(condition,
                            body,
                            loop_vars=(
                                initial_time,
                                initial_outputs_ta,
                                initial_state,
                                initial_inputs,
                                initial_finished,
                                initial_sequence_lengths,
                            ),
                            parallel_iterations=parallel_iterations,
                            maximum_iterations=maximum_iterations,
                            swap_memory=swap_memory)

        final_outputs_ta = res[1]
        final_state = res[2]
        final_sequence_lengths = res[5]

        final_outputs = tf.contrib.framework.nest.map_structure(
            lambda ta: ta.stack(), final_outputs_ta)

        try:
            final_outputs, final_state = decoder.finalize(
                final_outputs, final_state, final_sequence_lengths)
        except NotImplementedError:
            pass

    return final_state.pred_ids
Beispiel #60
0
                   top_dendrogram=True,
                   row_linkage=lambda x: linkage(x, method='average',
                                                 metric='correlation'),
                   col_linkage=lambda x: linkage(x.T, method='average',
                                                 metric='correlation'),
                   histogram=True)

#https://threader.app/thread/1105139360226140160
import tensorflow as tf
print(tf.__version__)
import datetime
print(datetime.datetime.now())
tf.keras.backend.clear_session()
(x_train, y_train), (x_test, y_test) = dfLFQ.load_data()
x_train, x_test = (x_train-min(x_train) / (max(x_train)-min(x_train) , (x_test-min(x_test) / (max(x_test)-min(x_test)
print("Eager:",tf.executing_eagerly())
print("GPU:",tf.test.is_gpu_available())#:with tf.device("/gpu:0"):
#tf.keras.backend.clear_session()

def create_model():
  return tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

model = create_model()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])