def testLayoutAndMeshShape(self):
    # Same as previous test, but don't specify a 4x2 mesh.
    mtf_graph = mtf.Graph()
    mesh = mtf.Mesh(mtf_graph, "my_mesh")
    x = mtf.zeros(mesh, "a:10,b:5")
    y = mtf.zeros(mesh, "b:5,c:20")
    z = mtf.einsum([x, y], "a:10,c:20")

    layout, mesh_shape = mtf.auto_mtf.layout_and_mesh_shape(mtf_graph, 8, [z])

    a_dim = mtf.convert_to_dimension(("a", 10))
    b_dim = mtf.convert_to_dimension(("b", 5))
    c_dim = mtf.convert_to_dimension(("c", 20))

    self.assertEqual(layout.tensor_dimension_to_mesh_axis(a_dim, mesh_shape), 1)
    self.assertIsNone(layout.tensor_dimension_to_mesh_axis(b_dim, mesh_shape))
    self.assertEqual(layout.tensor_dimension_to_mesh_axis(c_dim, mesh_shape), 0)

    self.assertCountEqual(mesh_shape.dims,
                          [mtf.Dimension("mesh_0", 4),
                           mtf.Dimension("mesh_1", 2)])

    layout, mesh_shape = mtf.auto_mtf.layout_and_mesh_shape(
        mtf_graph, 8, [z], 1)

    self.assertIsNone(layout.tensor_dimension_to_mesh_axis(a_dim, mesh_shape))
    self.assertIsNone(layout.tensor_dimension_to_mesh_axis(b_dim, mesh_shape))
    self.assertIsNone(layout.tensor_dimension_to_mesh_axis(c_dim, mesh_shape))

    self.assertCountEqual(mesh_shape.dims, [mtf.Dimension("mesh_0", 8)])
Esempio n. 2
0
    def testLayout(self):
        # Construct a Mesh TensorFlow graph and mesh.
        mtf_graph = mtf.Graph()
        mesh = mtf.Mesh(mtf_graph, "my_mesh")
        x = mtf.zeros(mesh, "a:10,b:5")
        y = mtf.zeros(mesh, "b:5,c:20")
        z = mtf.einsum([x, y], "a:10,c:20")

        # Decide on a mesh shape.
        mesh_shape = mtf.convert_to_shape("m1:4,m2:2")

        # Compute a layout based on the graph and mesh.
        # Note that knowing the identity of the outputs is important to the
        # optimization since they cannot be freed.
        layout = mtf.auto_mtf.layout(mtf_graph, mesh_shape, [z])

        a_dim = mtf.convert_to_dimension(("a", 10))
        b_dim = mtf.convert_to_dimension(("b", 5))
        c_dim = mtf.convert_to_dimension(("c", 20))

        self.assertEqual(
            layout.tensor_dimension_to_mesh_axis(a_dim, mesh_shape), 1)
        self.assertIsNone(
            layout.tensor_dimension_to_mesh_axis(b_dim, mesh_shape))
        self.assertEqual(
            layout.tensor_dimension_to_mesh_axis(c_dim, mesh_shape), 0)
Esempio n. 3
0
    def __init__(
            self,  # pylint: disable=super-init-not-called
            key_heads_dims=(("heads", 12), ),
            softmax_heads_dims=(("heads", 12), ),
            value_heads_dims=(("heads", 12), ),
            key_size=64,
            value_size=64,
            dropout_rate=0.0,
            relative_attention_type=None,
            relative_attention_num_buckets=32,
            dynamic_projections=None,
            dynamic_projections_init_scale=1e-2):
        """Create a SelfAttention Layer.

    Args:
      key_heads_dims: a list of mtf.Dimension or (name, size) pairs
      softmax_heads_dims: a list of mtf.Dimension or (name, size) pairs
      value_heads_dims: a list of mtf.Dimension or (name, size) pairs
      key_size: an integer
      value_size: an integer
      dropout_rate: a float
      relative_attention_type: an optional string - one of
        (None, "bias", "bias_shared", "contextual")
      relative_attention_num_buckets: an integer
      dynamic_projections: an optional sequence containing a subset of
        ["x2l", "m2l", "x2w", "m2w"] (see class comments)
      dynamic_projections_init_scale: a float - initializer variance scaling
        factor for these dynamic projections.  We have observed learning
        difficulties when this value is too large.
    """
        self.key_heads_dims = [
            mtf.convert_to_dimension(d) for d in key_heads_dims
        ]
        self.softmax_heads_dims = [
            mtf.convert_to_dimension(d) for d in softmax_heads_dims
        ]
        self.value_heads_dims = [
            mtf.convert_to_dimension(d) for d in value_heads_dims
        ]
        self.key_dim = mtf.Dimension("d_k", key_size)
        self.value_dim = mtf.Dimension("d_v", value_size)
        self.dropout_rate = dropout_rate
        self.relative_attention_type = relative_attention_type
        self.relative_attention_num_buckets = relative_attention_num_buckets
        self.dynamic_projections = dynamic_projections or []
        self.dynamic_projections_init_scale = dynamic_projections_init_scale
Esempio n. 4
0
 def __init__(self, mesh, shape, dtype, name=None):
   super(IndicesOperation, self).__init__([], mesh, name=name or "indices")
   self._mesh = mesh
   self._shape = [mtf.convert_to_dimension(dim) for dim in shape]
   self._dtype = dtype
   self._outputs = [
       mtf.Tensor(
           self,
           mtf.Shape(self._shape + [mtf.Dimension("ndim", len(self._shape))]),
           dtype)
   ]
Esempio n. 5
0
  def __init__(self,  # pylint: disable=super-init-not-called
               heads_dims=(("heads", 12),),
               dropout_rate=0.0,
               relative_attention_type=None,
               relative_attention_num_buckets=32):
    """Create a GeneralBilinearSelfAttention Layer.

    Args:
      heads_dims: a list of mtf.Dimension or (name, size) pairs
      dropout_rate: a float
      relative_attention_type: an optional string - one of
        (None, "bias", "bias_shared", "contextual")
      relative_attention_num_buckets: an integer
    """
    self.heads_dims = [
        mtf.convert_to_dimension(d) for d in heads_dims]
    self.dropout_rate = dropout_rate
    self.relative_attention_type = relative_attention_type
    self.relative_attention_num_buckets = relative_attention_num_buckets
Esempio n. 6
0
 def testConvertToDimensionGenericInputs(self):
     dimension = mtf.convert_to_dimension(None)
     self.assertEqual(dimension, None)
     with self.assertRaises(TypeError):
         mtf.convert_to_dimension(5)
Esempio n. 7
0
 def testConvertToDimension(self, inputs):
     dimension = mtf.convert_to_dimension(inputs)
     self.assertEqual(dimension.name, "x")
     self.assertEqual(dimension.size, 5)