예제 #1
0
    def test_tower_gradient_average(self):

        grad1 = [
            tensorflow.constant(numpy.random.random([10, 20]))
            for _ in range(3)
        ]
        variable1 = tensorflow.ones([10, 20])

        grad2 = [
            tensorflow.constant(numpy.random.random([10, 3, 4]))
            for _ in range(3)
        ]
        variable2 = tensorflow.ones([10, 3, 4])

        sparse_variable = tensorflow.ones([20, 20])
        sparse_grads = [
            tensorflow.IndexedSlices(
                values=tensorflow.constant(numpy.random.random([5, 20])),
                indices=tensorflow.constant([1, 2, 3, 4, 5]),
                dense_shape=tensorflow.shape(sparse_variable))
            for _ in range(3)
        ]

        tower1 = [(grad1[0], variable1), (grad2[0], variable2),
                  (sparse_grads[0], sparse_variable)]
        tower2 = [(grad1[1], variable1), (grad2[1], variable2),
                  (sparse_grads[1], sparse_variable)]
        tower3 = [(grad1[2], variable1), (grad2[2], variable2),
                  (sparse_grads[2], sparse_variable)]

        averages = average_gradients([tower1, tower2, tower3])
        session = tensorflow.Session()
        expected_grad1_mean = numpy.mean(session.run(grad1), 0)
        expected_grad2_mean = numpy.mean(session.run(grad2), 0)
        expected_grad3_mean = numpy.mean(
            session.run([x.values for x in sparse_grads]), 0)
        actual_grad1_mean = session.run(averages[0][0])
        actual_grad2_mean = session.run(averages[1][0])
        actual_grad3_mean = session.run(averages[2][0].values)
        numpy.testing.assert_array_almost_equal(expected_grad1_mean,
                                                actual_grad1_mean)
        numpy.testing.assert_array_almost_equal(expected_grad2_mean,
                                                actual_grad2_mean)
        numpy.testing.assert_array_almost_equal(expected_grad3_mean,
                                                actual_grad3_mean)
  def testAccumulatorTakeGrad(self):
    with self.test_session() as sess:
      q = tf.SparseConditionalAccumulator(tf.float32, name="Q", shape=())

      grad_indexed_slices = tf.IndexedSlices(
          indices=[0, 1], values=np.array([[1, 0], [0, 2]]).astype(np.float32))
      accum_op = q.apply_indexed_slices_grad(grad_indexed_slices)
      accum_op.run()
      accum_op = q.apply_grad([0, 2],
                              np.array([[0, 1], [3, 0]]).astype(np.float32),
                              [3, 2])
      accum_op.run()

      takeg_t = q.take_indexed_slices_grad(1)
      val = sess.run(takeg_t)
      self.assertAllEqual(val.indices, [0, 1, 2])
      self.assertAllEqual(val.values, [[0.5, 0.5], [0, 2], [3, 0]])
      self.assertAllEqual(val.dense_shape, [-1, 2])
예제 #3
0
    def testDeprecatedFunctionEndpoint(self, mock_warning):
        array = tf.IndexedSlices(
            tf.compat.v1.convert_to_tensor(np.array([1, 2])),
            tf.compat.v1.convert_to_tensor(np.array([0, 2])))
        mask_indices = tf.compat.v1.convert_to_tensor(np.array([2]))

        self.assertEqual(0, mock_warning.call_count)
        tf.sparse.mask(array, mask_indices)
        self.assertEqual(0, mock_warning.call_count)

        tf.sparse_mask(array, mask_indices)
        self.assertEqual(1, mock_warning.call_count)
        self.assertRegexpMatches(mock_warning.call_args[0][1],
                                 "deprecation_test.py:")
        self.assertRegexpMatches(mock_warning.call_args[0][2], r"sparse_mask")
        self.assertRegexpMatches(mock_warning.call_args[0][3], "sparse.mask")
        tf.sparse_mask(array, mask_indices)
        self.assertEqual(1, mock_warning.call_count)
예제 #4
0
def clip_gradient_norms(gradients_to_variables, max_norm):
    """Clips the gradients by the given value.
    Args:
      gradients_to_variables: A list of gradient to variable pairs (tuples).
      max_norm: the maximum norm value.
    Returns:
      A list of clipped gradient to variable pairs.
    """
    clipped_grads_and_vars = []
    for grad, var in gradients_to_variables:
        if grad is not None:
            if isinstance(grad, tf.IndexedSlices):
                tmp = tf.clip_by_norm(grad.values, max_norm)
                grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
            else:
                grad = tf.clip_by_norm(grad, max_norm)
        clipped_grads_and_vars.append((grad, var))
    return clipped_grads_and_vars
예제 #5
0
    def _clip_gradients(self, grads_and_vars):
        """In addition to standard gradient clipping, also clips embedding
    gradients to a specified value."""
        grads_and_vars = super(Seq2SeqModel,
                               self)._clip_gradients(grads_and_vars)

        clipped_gradients = []
        variables = []
        for gradient, variable in grads_and_vars:
            if "embedding" in variable.name:
                tmp = tf.clip_by_norm(
                    gradient.values,
                    self.params["optimizer.clip_embed_gradients"])
                gradient = tf.IndexedSlices(tmp, gradient.indices,
                                            gradient.dense_shape)
            clipped_gradients.append(gradient)
            variables.append(variable)
        return list(zip(clipped_gradients, variables))
예제 #6
0
    def _backward(self, loss, opt, summaries=False):
        hps = self.hps

        loss = loss * hps.num_steps

        emb_vars = find_trainable_variables('emb')
        lstm_vars = find_trainable_variables('LSTM')
        softmax_vars = find_trainable_variables('softmax')

        all_vars = emb_vars + lstm_vars + softmax_vars
        grads_and_var = opt.compute_gradients(loss, all_vars)
        grads = [grad for grad, _ in grads_and_var]
        #grads = tf.gradients(loss, all_vars)
        orig_grads = grads[:]
        emb_grads = grads[:len(emb_vars)]
        grads = grads[len(emb_vars):]
        for i in range(len(emb_grads)):
            #assert False
            assert isinstance(emb_grads[i], tf.IndexedSlices)
            emb_grads[i] = tf.IndexedSlices(
                emb_grads[i].values * hps.batch_size, emb_grads[i].indices,
                emb_grads[i].dense_shape)

        lstm_grads = grads[:len(lstm_vars)]
        softmax_grads = grads[len(lstm_vars):]

        lstm_grads, lstm_norm = tf.clip_by_global_norm(lstm_grads,
                                                       hps.max_grad_norm)
        clipped_grads = emb_grads + lstm_grads + softmax_grads
        assert len(clipped_grads) == len(orig_grads)

        if summaries:
            tf.summary.scalar('model/lstm_grad_norm', lstm_norm)
            tf.summary.scalar('model/lstm_grad_scale',
                              tf.minimum(hps.max_grad_norm / lstm_norm, 1.0))
            tf.summary.scalar('model/lstm_weight_norm',
                              tf.global_norm(lstm_vars))
            # for v, g, cg in zip(all_vars, orig_grads, clipped_grads):
            #     name = v.name.lstrip('model/')
            #     tf.histogram_summary(name + '/var', v)
            #     tf.histogram_summary(name + '/grad', g)
            #     tf.histogram_summary(name + '/clipped_grad', cg)

        return list(zip(clipped_grads, all_vars))
def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
    """Custom version of tf.clip_by_global_norm that doesn't check numerics."""
    if (
        not isinstance(t_list, collections.Sequence) or
        isinstance(t_list, six.string_types)
    ):
        raise TypeError("t_list should be a sequence")
    t_list = list(t_list)
    if use_norm is None:
        use_norm = tf.global_norm(t_list, name)

    with tf.name_scope(
        name, "clip_by_global_norm", t_list + [clip_norm]
    ) as name:
        # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
        scale = clip_norm * tf.minimum(
            1.0 / use_norm,
            tf.constant(1.0, dtype=use_norm.dtype) / clip_norm
        )

        values = [
            tf.convert_to_tensor(
                t.values if isinstance(t, tf.IndexedSlices) else t,
                name="t_%d" % i
            ) if t is not None else t for i, t in enumerate(t_list)
        ]

        values_clipped = []
        for i, v in enumerate(values):
            if v is None:
                values_clipped.append(None)
            else:
                with tf.colocate_with(v):
                    values_clipped.append(
                        tf.identity(v * scale, name="%s_%d" % (name, i))
                    )

        list_clipped = [
            tf.IndexedSlices(c_v, t.indices, t.dense_shape)
            if isinstance(t, tf.IndexedSlices) else c_v
            for (c_v, t) in zip(values_clipped, t_list)
        ]

    return list_clipped, use_norm
예제 #8
0
  def testMultiplyInverse(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)

      # Create a Fisher Block.
      vocab_size = 5
      block = fb.FullyConnectedKFACBasicFB(
          lc.LayerCollection(),
          diagonal_approx_for_input=True)

      # Add some examples.
      inputs = tf.constant([[0, 1], [1, 2], [2, 3]])
      inputs.one_hot_depth = vocab_size
      outputs = tf.constant([[0.], [1.], [2.]])
      block.register_additional_tower(inputs, outputs)

      # Instantiate factor's variables. Ensure it doesn't fail.
      grads = outputs**2.
      damping = tf.constant(0.)
      block.instantiate_factors(((grads,),), damping)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Create a sparse update.
      indices = tf.constant([1, 3, 4])
      values = tf.constant([[1.], [1.], [1.]])
      sparse_vector = tf.IndexedSlices(
          values, indices, dense_shape=[vocab_size, 1])
      dense_vector = tf.reshape([0., 1., 0., 1., 1.], [vocab_size, 1])

      # Compare Fisher-vector product against explicit result.
      result = block.multiply_inverse(sparse_vector)
      expected_result = tf.matrix_solve(block.full_fisher_block(), dense_vector)

      sess.run(tf.global_variables_initializer())
      self.assertAlmostEqual(
          sess.run(expected_result[1]), sess.run(result.values[0]))
      self.assertAlmostEqual(
          sess.run(expected_result[3]), sess.run(result.values[1]))
      self.assertAlmostEqual(
          sess.run(expected_result[4]), sess.run(result.values[2]))
예제 #9
0
    def test_executes_with_indexed_slices(self):
        # TF can represent gradients as tf.IndexedSlices. This test makes sure this
        # case is supported by the optimizer.
        weights = tf.ones([4, 2])
        gradients = tf.IndexedSlices(values=tf.constant([[1.0, 1.0],
                                                         [1.0, 1.0]]),
                                     indices=tf.constant([0, 2]),
                                     dense_shape=tf.constant([4, 2]))
        # Always-zero preconditioner and accumulator, for simplicity of this test.
        optimizer = yogi.build_yogi(0.5,
                                    beta_1=0.0,
                                    beta_2=0.0,
                                    epsilon=1e-7,
                                    initial_preconditioner_value=0.0)

        state = optimizer.initialize(tf.TensorSpec([4, 2]))
        _, weights = optimizer.next(state, weights, gradients)
        self.assertAllClose([[0.5, 0.5], [1.0, 1.0], [0.5, 0.5], [1.0, 1.0]],
                            weights)
예제 #10
0
 def apply(self, updates: Sequence[types.ParameterUpdate],\
     parameters: Sequence[tf.Variable]):
     
     # optimizer_utils.check_distribution_strategy()
     optimizer_utils.check_updates_parameters(updates, parameters)
     self.step.assign_add(1)
     for update, param in zip(updates, parameters):
         if update is not None:
             optimizer_utils.check_same_dtype(update, param)
             learning_rate = tf.cast(self.learning_rate, update.dtype)
             iterations = tf.cast(self.iterations, update.dtype)
             step = tf.cast(self.step, update.dtype)
             if isinstance(update, tf.IndexedSlices):
                 update, indices = optimizer_utils.deduplicate_indexed_slices(update)
                 update = cca_update(g=update, t=step, lr=learning_rate, iterations=iterations)
                 param.scatter_sub(tf.IndexedSlices(update, indices))
             else:
                 update = cca_update(g=update, t=step, lr=learning_rate, iterations=iterations)
                 param.assign_sub(update) # substract the gradient*step
 def modify_grads(self, grads, emb):
     """
     The tensor flow autograd gives us Euclidean gradients. Here we multiply by (1/4)(1-||emb||^2)^2
     to convert to the hyperbolic gradient
     :param grads: a list of tuples of [(grads, name),...]
     :param emb: A tensor embedding
     :return: The hyperbolic gradient
     """
     scaled_grads = []
     for grad, name in grads:
         vecs = tf.nn.embedding_lookup(emb, grad.indices)
         norm_squared = tf.square(tf.norm(vecs, axis=0))
         hyperbolic_factor = 0.25 * tf.square(1 - norm_squared)
         g = tf.multiply(grad.values, hyperbolic_factor)
         # g_clip = tf.clip_by_value(g, -0.1, 0.1)
         scaled_grad = tf.IndexedSlices(g, grad.indices, grad.dense_shape)
         scaled_grads.append((scaled_grad, name))
     # scaled_theta_grad = [(tf.clip_by_value(tf.scatter_div(g, g.indices, radius), -1, 1), v) for g, v in grads]
     return scaled_grads
예제 #12
0
    def to_dense(self, scope=None):
        with tf.name_scope(scope, 'BoxListSparseToDense'):
            tensor_dict = {}
            flatten_indices = self.indices[:, 0] * self.dense_shape[1] + self.indices[:, 1]
            for field in self.data.get_all_fields():
                if field != "is_valid":
                    sparse_field_data = self.data.get_field(field)
                    field_specific_shape = shape_utils.combined_static_and_dynamic_shape(
                        sparse_field_data)[1:]

                    if len(field_specific_shape) > 0:
                        flatten_dense_field_data = tf.IndexedSlices(
                            values=sparse_field_data,
                            indices=flatten_indices,
                            dense_shape=tf.concat(
                                [
                                    [self.dense_shape[0] * self.dense_shape[1]],
                                    tf.cast(field_specific_shape, tf.int64)
                                ],
                                axis=0
                            )
                        )
                        tensor_dict[field] = tf.reshape(
                            tf.convert_to_tensor(flatten_dense_field_data),
                            [self.dense_shape[0], self.dense_shape[1]] + field_specific_shape
                        )
                    else:
                        tensor_dict[field] = tf.sparse.to_dense(
                            tf.SparseTensor(
                                self.indices, sparse_field_data, self.dense_shape
                            )
                        )
            mask = tf.SparseTensor(
                self.indices,
                tf.ones([tf.shape(self.indices)[0]], dtype=tf.bool),
                self.dense_shape
            )
            mask = tf.sparse.to_dense(mask, default_value=False)
            tensor_dict['is_valid'] = mask
            dense = BoxList.from_tensor_dict(tensor_dict)
            for tracking in self.get_all_trackings():
                dense.set_tracking(tracking, self.get_tracking(tracking))
            return dense
예제 #13
0
파일: utils_tf.py 프로젝트: kitlien/tf-dann
def multiply_gradients(grads_and_vars, gradient_multipliers):
    """Multiply specified gradients.

    Args:
        grads_and_vars: A list of gradient to variable pairs (tuples).
        gradient_multipliers: A map from either `Variables` or `Variable` op names
            to the coefficient by which the associated gradient should be scaled.

    Returns:
        The updated list of gradient to variable pairs.

    Raises:
        ValueError: If `grads_and_vars` is not a list or if `gradient_multipliers`
        is empty or None or if `gradient_multipliers` is not a dictionary.
        
    References:
        tensorflow/contrib/slim/python/slim/learning.py
    """
    if not isinstance(grads_and_vars, list):
        raise ValueError('`grads_and_vars` must be a list.')
    if not gradient_multipliers:
        raise ValueError('`gradient_multipliers` is empty.')
    if not isinstance(gradient_multipliers, dict):
        raise ValueError('`gradient_multipliers` must be a dict.')

    multiplied_grads_and_vars = []
    for grad, var in grads_and_vars:
        if var in gradient_multipliers or var.op.name in gradient_multipliers:
            key = var if var in gradient_multipliers else var.op.name
            if grad is None:
                raise ValueError('Requested multiple of `None` gradient.')

            multiplier = gradient_multipliers[key]
            if not isinstance(multiplier, tf.Tensor):
                multiplier = tf.constant(multiplier, dtype=grad.dtype)

            if isinstance(grad, tf.IndexedSlices):
                tmp = grad.values * multiplier
                grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
            else:
                grad *= multiplier
        multiplied_grads_and_vars.append((grad, var))
    return multiplied_grads_and_vars
예제 #14
0
def reduce(tensor, root_rank, average=True, fusion=0, fusion_id=-1):
    """NPU implemented reduce"""
    basic = NPUBasics("")
    size = basic.size()
    # the tensor is the instance of tf.IndexedSlices
    if isinstance(tensor, tf.IndexedSlices):
        # For IndexedSlices, do two allgathers intead of a reduce.
        logging.debug("HcomAllgather...")
        values = hccl_ops.allgather(tensor.values, size)
        indices = hccl_ops.allgather(tensor.indices, size)

        if values is None:
            raise ValueError(
                'the result of tf.HcomAllgather([tensor.values]) is empty')
        if indices is None:
            raise ValueError(
                'the result of tf.HcomAllgather([tensor.indices]) is empty')

        # To make this operation into an average, divide all gathered values by the size.
        rank_size = tf.cast(size, tensor.values.dtype)
        new_values = tf.div(values, rank_size) if average else values

        return tf.IndexedSlices(new_values,
                                indices,
                                dense_shape=tensor.dense_shape)

    logging.debug("HcomReduce...")
    local_rank_id = os.getenv('DEVICE_ID')
    if local_rank_id is None or int(local_rank_id) < 0:
        raise ValueError(
            'Please set the correct RANK_ID value, current RANK_ID is:',
            local_rank_id)

    summed_tensor = hccl_ops.reduce(tensor, "sum", root_rank, fusion,
                                    fusion_id)
    if summed_tensor is None:  # and summed_tensor:
        raise ValueError('the result of tf.DavinciReduce([tensor]) is empty')
    if root_rank != int(local_rank_id):
        return summed_tensor
    rank_size = tf.cast(size, dtype=tensor.dtype)
    new_tensor = tf.div(summed_tensor, rank_size) if average else summed_tensor
    return new_tensor
예제 #15
0
def allreduce(tensor, grace, average=True, device_dense='', device_sparse=''):
    """Perform an allreduce on a tf.Tensor or tf.IndexedSlices.

    This function performs a bandwidth-optimal ring allreduce on the input
    tensor. If the input is an tf.IndexedSlices, the function instead does an
    allgather on the values and the indices, effectively doing an allreduce on
    the represented tensor.

    Arguments:
        tensor: tf.Tensor, tf.Variable, or tf.IndexedSlices to reduce.
                The shape of the input must be identical across all ranks.
        average: If True, computes the average over all ranks.
                 Otherwise, computes the sum over all ranks.
        device_dense: Device to be used for dense tensors. Uses GPU by default
                      if Horovod was built with HOROVOD_GPU_ALLREDUCE.
        device_sparse: Device to be used for sparse tensors. Uses GPU by default
                       if Horovod was built with HOROVOD_GPU_ALLGATHER.
        grace: Compression algorithm used to reduce the amount of data
                     sent and received by each worker node.  Defaults to not
                     using compression.

    Returns:
        A tensor of the same shape and type as `tensor`, summed across all
        processes.
    """
    if isinstance(tensor, tf.IndexedSlices):
        with tf.device(device_sparse):
            # For IndexedSlices, do two allgathers instead of an allreduce.
            horovod_size = tf.cast(size(), tensor.values.dtype)
            values = allgather(tensor.values)
            indices = allgather(tensor.indices)

            # To make this operation into an average, divide allgathered values by
            # the Horovod size.
            new_values = (values / horovod_size) if average else values
        return tf.IndexedSlices(new_values,
                                indices,
                                dense_shape=tensor.dense_shape)
    else:
        with tf.device(device_dense):
            new_tensor = grace.step(tensor)
        return new_tensor
예제 #16
0
  def _apply_sparse(self, grad, var):
    """
    :param tf.IndexedSlices grad:
    :param tf.Variable var:
    :return: group of update operations
    :rtype: tf.Operation
    """
    beta2_power = tf.cast(self._beta2_power, var.dtype.base_dtype)
    lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
    beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
    beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
    epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype)
    mu_t = tf.cast(self._mu_t, var.dtype.base_dtype)
    mu_t_next = tf.cast(self._mu_t_next, var.dtype.base_dtype)
    mu_prod_t_next = tf.cast(self._mu_prod_t_next, var.dtype.base_dtype)
    mu_prod_t_next2 = tf.cast(self._mu_prod_t_next2, var.dtype.base_dtype)

    m_prev = self.get_slot(var, "m")
    v_prev = self.get_slot(var, "v")

    # called m_t in paper
    m = beta1_t * m_prev
    m = tf.assign(m_prev, m, use_locking=self._use_locking)
    m = tf.scatter_add(m, grad.indices, (1 - beta1_t) * grad.values, use_locking=self._use_locking)
    m_update = m
    m_ = m / (1 - mu_prod_t_next2)  # bias correction (with momentum schedule (include the next t+1))

    # called n_t in paper
    v = beta2_t * v_prev
    v = tf.assign(v_prev, v, use_locking=self._use_locking)
    v = tf.scatter_add(v, grad.indices, (1 - beta2_t) * (grad.values * grad.values), use_locking=self._use_locking)
    v_update = v
    v_ = v / (1 - beta2_power)

    m__ = tf.sparse_add(
      mu_t_next * m_,
      tf.IndexedSlices((1 - mu_t) * grad.values / (1 - mu_prod_t_next), grad.indices, grad.dense_shape))

    step = lr_t * m__ / (tf.sqrt(v_) + epsilon_t)
    var_update = tf.assign_sub(var, step, use_locking=self._use_locking)

    return tf.group(var_update, m_update, v_update)
 def merge_grads(self):
     indices_grads = {}
     grads = {}
     for ts in self.sub_tensors:
         for g, v in ts.grad:
             if isinstance(g, tf.IndexedSlices):
                 if v not in indices_grads:
                     indices_grads[v] = []
                 indices_grads[v].append(g)
             else:
                 if v not in grads:
                     grads[v] = []
                 grads[v].append(g)
     results = [(tf.reduce_mean(grads[v], axis = 0),v) for v in grads]
     for v in indices_grads:
         indices = tf.concat([g.indices for g in indices_grads[v]], axis = 0)
         values = tf.concat([g.values for g in indices_grads[v]], axis=0)
         g = tf.IndexedSlices(values, indices)
         results.append((g, v))
     return results
예제 #18
0
    def _clip_gradients_fn(self, grads_and_vars):
        """Clips gradients by global norm."""
        gradients, variables = zip(*grads_and_vars)
        self._grads_and_vars = grads_and_vars

        if self._clip_gradients > 0.0:
            clipped_gradients, _ = tf.clip_by_global_norm(
                t_list=gradients, clip_norm=self._clip_gradients)
            grads_and_vars = list(zip(clipped_gradients, variables))
        if self._clip_embed_gradients > 0.0:
            clipped_gradients = []
            variables = []
            for gradient, variable in grads_and_vars:
                if "embedding" in variable.name or "Embedding" in variable.name:
                    tmp = tf.clip_by_norm(t=gradient.values, clip_norm=self._clip_embed_gradients)
                    gradient = tf.IndexedSlices(tmp, gradient.indices, gradient.dense_shape)
                clipped_gradients.append(gradient)
                variables.append(variable)
            grads_and_vars = list(zip(clipped_gradients, variables))
        return grads_and_vars
예제 #19
0
    def restore_checkpoint(self, checkpoint_path, distributed=False):
        chunks = math.ceil(self.embedding_table.shape[0] /
                           _embedding_checkpoint_batch)
        for i in range(chunks):
            filename = get_variable_path(checkpoint_path, self.feature_name, i)
            start = i * _embedding_checkpoint_batch
            numpy_arr = np.load(file=filename)

            if distributed:
                numpy_arr = np.split(
                    numpy_arr, axis=1,
                    indices_or_sections=hvd.size())[hvd.rank()]

            indices = tf.range(start=start,
                               limit=start + numpy_arr.shape[0],
                               dtype=tf.int32)
            update = tf.IndexedSlices(values=numpy_arr,
                                      indices=indices,
                                      dense_shape=self.embedding_table.shape)
            self.embedding_table.scatter_update(sparse_delta=update)
예제 #20
0
 def _allreduce_grad_one_by_one(self, tensor):
     n_workers = tf.cast(self.n_workers, dtype=tensor.dtype)
     if isinstance(tensor, tf.IndexedSlices):
         dedup_tensor = self._create_deduplicated_indexed_slices(
             tensor.indices, tensor.values, tensor.dense_shape)
         indices = self._allgather([dedup_tensor.indices],
                                   self.values_compression)[0]
         values = self._allgather([dedup_tensor.values],
                                  self.indices_compression)[0]
         if self.average:
             values = tf.div(values, n_workers)
         return tf.IndexedSlices(indices=indices,
                                 values=values,
                                 dense_shape=tensor.dense_shape)
     else:
         summed_tensor = self._allreduce([tensor],
                                         self.values_compression)[0]
         if self.average:
             summed_tensor = tf.div(summed_tensor, n_workers)
         return summed_tensor
예제 #21
0
		def average_sparse(grad_and_vars):
			if len(grad_and_vars) == 1:
				return grad_and_vars[0][0]

			indices = []
			values = []
			for g, _ in grad_and_vars:
				indices += [g.indices]
				values += [g.values]
			
			indices = tf.cast(tf.concat(indices, 0), tf.int32)
			values = tf.concat(values, 0)
			# Average tf.IndexedSlices
			sorted_indices = tf.py_func(np.sort, [indices], tf.int32, stateful=False)
			sorted_indices_ = tf.py_func(np.argsort, [indices], tf.int64, stateful=False)
			sorted_sparse_values = tf.gather(values, sorted_indices_)
			unique_sorted_indices, segment_ids = tf.unique(sorted_indices)
			mean_values = tf.segment_sum(sorted_sparse_values, segment_ids) / config.num_gpus
	
			return tf.IndexedSlices(mean_values, unique_sorted_indices, grad_and_vars[0][0].dense_shape)
예제 #22
0
    def _clip_sparse(self, grad, var):
        assert isinstance(grad, tf.IndexedSlices)
        clip_dims = self._vars_to_clip_dims[var]
        if 0 in clip_dims:
            # `x.op` doesn't work in eager execution.
            name = var.name if tf.executing_eagerly() else var.op.name
            tf.compat.v1.logging.warning(
                'Clipping norm across dims %s for %s is inefficient '
                'when including sparse dimension 0.', clip_dims, name)
            return self._clip_dense(var)

        with tf.compat.v1.colocate_with(var):
            var_subset = tf.gather(var, grad.indices)
        with self._maybe_colocate_with(var):
            normalized_var_subset = tf.clip_by_norm(var_subset, self._max_norm,
                                                    clip_dims)
            delta = tf.IndexedSlices(var_subset - normalized_var_subset,
                                     grad.indices, grad.dense_shape)
        with tf.compat.v1.colocate_with(var):
            return var.scatter_sub(delta, use_locking=self._use_locking)
  def testWithIndexedSlicesDependencies(self):
    with self.test_session():
      v = tf.Variable(
          np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(np.float32))
      v_at_1 = tf.IndexedSlices(v, tf.constant([1]))
      gather_v_at_1 = tf.gather(v_at_1.values, v_at_1.indices)
      v_at_1_after_init = control_flow_ops.with_dependencies([v.initializer],
                                                             v_at_1)
      gather_v_at_1_after_init = tf.gather(
          v_at_1_after_init.values, v_at_1_after_init.indices)

      # Fetching gather_v_at_1 will result in an uninitialized error
      with self.assertRaisesOpError("Attempting to use uninitialized value"):
        gather_v_at_1.eval()

      # Getting gather_v_at_1_after_init will work, and initialize v.
      self.assertAllEqual([[10.0, 11.0]], gather_v_at_1_after_init.eval())

      # Double check that 'v' is initialized
      self.assertAllClose([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]], v.eval())
 def _setupSparse(self, is_distributed, dtype, sess):
     with self._maybeWithDevice("/job:ps" if is_distributed else None):
         var0 = tf.Variable([[0.0, 1.0], [2.0, 3.0], [4.0, 5.0]],
                            dtype=dtype)
         var1 = tf.Variable([[0.0, 1.0], [0.0, 3.0], [0.0, 5.0]],
                            dtype=dtype)
     with self._maybeWithDevice("/job:worker" if is_distributed else None):
         grads = tf.IndexedSlices(
             tf.constant([[0.1, 0.1], [0.1, 0.1]], dtype=dtype), [0, 2],
             [3, 2])
         sgd = tf.compat.v1.train.GradientDescentOptimizer(3.0)
         clip_opt = tfgan.features.VariableClippingOptimizer(
             sgd, {
                 var0: [1],
                 var1: [0]
             }, 2.0)
         update_op = clip_opt.apply_gradients(
             list(zip([grads, grads], [var0, var1])))
         sess.run(tf.compat.v1.global_variables_initializer())
     return var0, var1, update_op
예제 #25
0
파일: utils.py 프로젝트: zerocurve/seed_rl
    def append(self, env_ids, values):
        """Appends values and returns completed unrolls.

    Args:
      env_ids: 1D tensor with the list of environment IDs for which we append
        data.
        There must not be duplicates.
      values: Values to add for each environment. This is a structure
        (in the tf.nest sense) of tensors following "timestep_specs", with a
        batch front dimension which must be equal to the length of 'env_ids'.

    Returns:
      A pair of:
        - 1D tensor of the environment IDs of the completed unrolls.
        - Completed unrolls. This is a structure of tensors following
          'timestep_specs', with added front dimensions: [num_completed_unrolls,
          num_overlapping_steps + unroll_length + 1].
    """
        tf.debugging.assert_equal(
            tf.shape(env_ids),
            tf.shape(tf.unique(env_ids)[0]),
            message=f'Duplicate environment ids in store {self.name}')

        tf.nest.map_structure(
            lambda s: tf.debugging.assert_equal(
                tf.shape(env_ids)[0],
                tf.shape(s)[0],
                message=(
                    f'Batch dimension must equal the number of environments '
                    f'in store {self.name}.')), values)

        curr_indices = self._index.sparse_read(env_ids)
        unroll_indices = tf.stack([env_ids, curr_indices], axis=-1)
        for s, v in zip(tf.nest.flatten(self._state), tf.nest.flatten(values)):
            s.scatter_nd_update(unroll_indices, v)

        # Intentionally not protecting against out-of-bounds to make it possible to
        # detect completed unrolls.
        self._index.scatter_add(tf.IndexedSlices(1, env_ids))

        return self._complete_unrolls(env_ids)
예제 #26
0
  def testMultiplyInverseTranspose(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)

      vocab_size = 5
      block = fb.EmbeddingKFACMultiIndepFB(lc.LayerCollection(), vocab_size)

      inputs = [tf.constant([[0, 1], [1, 2], [2, 3]]),
                tf.constant([[0.1], [0.], [0.]])]
      outputs = [tf.constant([[0.], [1.], [2.]]),
                 tf.constant([[0, 0], [0, 0], [0, 4]])]
      block.register_additional_tower(inputs, outputs, transpose=[False, True])

      grads = [output**2 for output in outputs]
      damping = tf.constant(0.)
      block.instantiate_factors(((grads,),), damping)

      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Create a sparse update.
      indices = tf.constant([1, 3, 4])
      values = tf.constant([[1.], [1.], [1.]])
      sparse_vector = tf.IndexedSlices(
          values, indices, dense_shape=[vocab_size, 1])
      dense_vector = tf.reshape([0., 1., 0., 1., 1.], [vocab_size, 1])

      # Compare Fisher-vector product against explicit result.
      result = block.multiply_inverse(sparse_vector)
      expected_result = tf.matrix_solve(block.full_fisher_block(), dense_vector)

      sess.run(tf.global_variables_initializer())
      self.assertAlmostEqual(
          sess.run(expected_result[1]), sess.run(result.values[0]))
      self.assertAlmostEqual(
          sess.run(expected_result[3]), sess.run(result.values[1]))
      self.assertAlmostEqual(
          sess.run(expected_result[4]), sess.run(result.values[2]))
    def _backward(self, loss, summaries=False):
        hps = self.hps

        loss = loss * hps.num_steps

        emb_vars = find_trainable_variables("emb")
        lstm_vars = find_trainable_variables("LSTM")
        softmax_vars = find_trainable_variables("softmax")

        all_vars = emb_vars + lstm_vars + softmax_vars
        grads = tf.gradients(loss, all_vars)
        orig_grads = grads[:]
        emb_grads = grads[:len(emb_vars)]
        grads = grads[len(emb_vars):]
        for i in range(len(emb_grads)):
            assert isinstance(emb_grads[i], tf.IndexedSlices)
            emb_grads[i] = tf.IndexedSlices(
                emb_grads[i].values * hps.batch_size, emb_grads[i].indices,
                emb_grads[i].dense_shape)

        lstm_grads = grads[:len(lstm_vars)]
        softmax_grads = grads[len(lstm_vars):]

        lstm_grads, lstm_norm = tf.clip_by_global_norm(lstm_grads,
                                                       hps.max_grad_norm)
        clipped_grads = emb_grads + lstm_grads + softmax_grads
        assert len(clipped_grads) == len(orig_grads)

        if summaries:
            tf.scalar_summary("model/lstm_grad_norm", lstm_norm)
            tf.scalar_summary("model/lstm_grad_scale",
                              tf.minimum(hps.max_grad_norm / lstm_norm, 1.0))
            tf.scalar_summary("model/lstm_weight_norm",
                              tf.global_norm(lstm_vars))
            # for v, g, cg in zip(all_vars, orig_grads, clipped_grads):
            #     name = v.name.lstrip("model/")
            #     tf.histogram_summary(name + "/var", v)
            #     tf.histogram_summary(name + "/grad", g)
            #     tf.histogram_summary(name + "/clipped_grad", cg)

        return list(zip(clipped_grads, all_vars))
예제 #28
0
    def testClipByGlobalNormWithIndexedSlicesClipped(self):
        # Norm clipping when clip_norm < 5
        with self.test_session():
            x0 = tf.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
            x1 = tf.IndexedSlices(tf.constant([1.0, -2.0]), tf.constant([3,
                                                                         4]))
            # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
            clip_norm = 4.0

            # Answers are the original tensors scaled by 4.0/5.0
            np_ans_0 = [[-1.6, 0.0, 0.0], [3.2, 0.0, 0.0]]
            np_ans_1 = [0.8, -1.6]

            ans, norm = tf.clip_by_global_norm([x0, x1], clip_norm)
            tf_ans_1 = ans[0].eval()
            tf_ans_2 = ans[1].values.eval()
            tf_norm = norm.eval()

        self.assertAllClose(tf_norm, 5.0)
        self.assertAllClose(np_ans_0, tf_ans_1)
        self.assertAllClose(np_ans_1, tf_ans_2)
예제 #29
0
  def _clip_gradients(self, grad):
    """Clips gradients if the hyperparameter `gradient_clip_norm` requires it.

    Sparse tensors, in the form of IndexedSlices returned for the
    gradients of embeddings, require special handling.

    Args:
      grad: Gradient Tensor, IndexedSlices, or None.

    Returns:
      Optionally clipped gradient.
    """
    if grad is not None and self.hyperparams.gradient_clip_norm > 0:
      logging.info('Clipping gradient %s', grad)
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm)
        return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm)
    else:
      return grad
예제 #30
0
    def testBasic(self):
        values = np.random.rand(4, 4).astype(np.single)
        indices = np.array([0, 2, 3, 4], dtype=np.int32)
        mask_indices = np.array([0], dtype=np.int32)

        out_values = values[1:, :]
        out_indices = np.array([2, 3, 4], dtype=np.int32)

        with self.test_session() as sess:
            values_tensor = tf.convert_to_tensor(values)
            indices_tensor = tf.convert_to_tensor(indices)
            mask_indices_tensor = tf.convert_to_tensor(mask_indices)

            t = tf.IndexedSlices(values_tensor, indices_tensor)
            masked_t = tf.sparse_mask(t, mask_indices_tensor)

            tf_out_values, tf_out_indices = sess.run(
                [masked_t.values, masked_t.indices])

            self.assertAllEqual(tf_out_values, out_values)
            self.assertAllEqual(tf_out_indices, out_indices)