コード例 #1
0
ファイル: test_sparse.py プロジェクト: muzzynine/examples-1
    def test_device_version_equality_ipu2(self):
        from ipu_sparse_ops import sparse
        bs = 16
        block_mask = np.array([[1, 0, 0], [0, 1, 0], [1, 1, 0], [0, 0, 1]])
        mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int)
        n_els = np.count_nonzero(mask)
        dense = np.zeros_like(mask)
        dense[np.nonzero(mask)] = np.arange(n_els)
        opts = {"metaInfoBucketOversizeProportion": 1}
        t = sparse.triplets_from_dense(dense)
        spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]],
                                           max_non_zeros=n_els,
                                           block_size=1,
                                           dtype=tf.float32)

        # from device
        device_r = sparse.representation_from_triplets(spec,
                                                       *t,
                                                       opts,
                                                       ipu_version=0)
        device_t_rt = sparse.triplets_from_representation(spec,
                                                          device_r,
                                                          opts,
                                                          ipu_version=0)

        # from version
        version_r = sparse.representation_from_triplets(spec,
                                                        *t,
                                                        opts,
                                                        ipu_version=2)
        version_t_rt = sparse.triplets_from_representation(spec,
                                                           version_r,
                                                           opts,
                                                           ipu_version=2)

        assert_equal(device_r.metainfo_state, version_r.metainfo_state)
        assert_equal(device_r.nz_values, version_r.nz_values)
        assert_equal(device_t_rt, version_t_rt)
コード例 #2
0
 def update_from_values(self,
                        values: List[float],
                        metainfo: List[float] = None):
     np.copyto(self.representation.nz_values, values)
     if metainfo is not None:
         # Reinterpret cast the metainfo as uint16 rather than float16.
         metainfo_as_uint16 = np.frombuffer(metainfo.tobytes(),
                                            dtype=np.uint16)
         np.copyto(self.representation.metainfo_state, metainfo_as_uint16)
     self.triplets = sparse.triplets_from_representation(
         self.spec,
         self.representation,
         self.matmul_options,
         debug_name=self.name)
コード例 #3
0
 def extract_slot_triplets(self) -> Mapping[str, sparse.Triplets]:
     slot_representations = {
         name: sparse.SparseRepresentation(self.weights.get_metainfo(),
                                           slot.np_variable)
         for name, slot in self.get_slot_var_dict().items()
     }
     return {
         name:
         sparse.triplets_from_representation(self.weights.spec,
                                             representation,
                                             self.weights.matmul_options,
                                             debug_name=name + "(slot)")
         for name, representation in slot_representations.items()
     }
コード例 #4
0
ファイル: test_sparse.py プロジェクト: muzzynine/examples-1
 def test_representation_round_trip_elements(self):
     from ipu_sparse_ops import sparse
     bs = 16
     block_mask = np.array([[1, 0, 0], [0, 1, 0], [1, 1, 0], [0, 0, 1]])
     mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int)
     n_els = np.count_nonzero(mask)
     dense = np.zeros_like(mask)
     dense[np.nonzero(mask)] = np.arange(n_els)
     opts = {"metaInfoBucketOversizeProportion": 1}
     t = sparse.triplets_from_dense(dense)
     spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]],
                                        max_non_zeros=n_els,
                                        block_size=1,
                                        dtype=tf.float32)
     r = sparse.representation_from_triplets(spec, *t, opts)
     t_rt = sparse.triplets_from_representation(spec, r, opts)
     dense_rt = sparse.dense_from_triplets(spec, *t_rt)
     assert_equal(dense, dense_rt)
コード例 #5
0
ファイル: test_sparse.py プロジェクト: muzzynine/examples-1
    def test_representation_round_trip_blocks(self):
        from ipu_sparse_ops import sparse
        for bs in [4, 8, 16]:
            # Create a mask that describes the non-zero block structure:
            block_mask = np.array([[1, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]])
            n_blocks = np.count_nonzero(block_mask)
            # From that produce an element-wise mask using a Kronecker product:
            mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int)
            n_els = np.count_nonzero(mask)
            # Make a dense matrix from the element-wise mask and fill with random values:
            dense = np.zeros_like(mask, dtype=np.float32)
            values = np.random.rand(n_els)
            dense[np.nonzero(mask)] = values
            # Make the spec for the sparse matmul:
            opts = {"metaInfoBucketOversizeProportion": 1}
            spec = sparse.matmul_spec_from_max(dense.shape[1],
                                               [2, dense.shape[0]],
                                               max_non_zeros=n_blocks,
                                               block_size=bs,
                                               dtype=tf.float32)
            # Make triplets indices from the block mask:
            t = sparse.triplets_from_dense(block_mask)
            # Then fill in triplet's values by extracting the blocks
            # from the dense matrix (this can't be done by reshaping):
            t_block = sparse.Triplets(
                t.row_indices, t.col_indices,
                sparse.blocks_at_indices(t.row_indices, t.col_indices, bs,
                                         dense))
            # Convert to on device representation and back and check the
            # result is the dense matrix we sytarted with:
            r = sparse.representation_from_triplets(spec, *t_block, opts)
            t_rt = sparse.triplets_from_representation(spec, r, opts)
            dense_rt = sparse.dense_from_triplets(spec, *t_rt)
            assert_equal(dense, dense_rt)

            # Check triplets from dense returns original triplets:
            td = sparse.triplets_from_dense(dense_rt, bs)
            assert_equal(t_block.row_indices, td.row_indices)
            assert_equal(t_block.col_indices, td.col_indices)
            assert_equal(t_block.values, td.values)
コード例 #6
0
def main(args):
    tf.logging.set_verbosity(tf.logging.ERROR)
    np.set_printoptions(linewidth=200)
    random_seed = args.random_seed
    checkpoint_path = os.path.join(tempfile.mkdtemp(), "model.ckpt")

    # Input activations for the attention layer
    random_gen = np.random.default_rng(seed=random_seed)
    activations_np = random_gen.uniform(-0.1,
                                        0.1,
                                        size=(args.batch_size,
                                              args.source_sequence_length,
                                              args.hidden_length))

    # Configure the IPU
    cfg = ipu.utils.create_ipu_config(profiling=args.profile,
                                      report_directory="./report/")
    cfg = ipu.utils.auto_select_ipus(cfg, 1)
    ipu.utils.configure_ipu_system(cfg)

    # Build IPU graphs
    sparse_decoder_graph = tf.Graph()
    sparse_transformer = DynsparseTransformer(args)
    with sparse_decoder_graph.as_default():
        with tf.device("cpu"):
            # placeholder for activations
            # weight placeholders are created inside sparse_transfomer
            inputs_ph = tf.placeholder(args.dtype, activations_np.shape)
        with ipu.scopes.ipu_scope("/device:IPU:0"):
            sparse_decoder = partial(sparse_transformer_fwd_and_grad,
                                     sparse_transformer)
            sparse_decoder_fetches = ipu.ipu_compiler.compile(
                sparse_decoder, [inputs_ph])
            ipu.utils.move_variable_initialization_to_cpu()

    # sparse-decoder
    with tf.Session(graph=sparse_decoder_graph) as sess:
        # initialize weights
        sess.run(tf.global_variables_initializer())

        # Save the sparse weights to checkpoint as dense
        sparse_transformer.checkpointAsDense(checkpoint_path)

        # run sparse decoder
        sparse_result = sess.run(sparse_decoder_fetches,
                                 feed_dict={inputs_ph: activations_np})

    # Create a dense transformer and initialize the weights to the values that
    # the sparse model was initialzed with originally
    dense_decoder_graph = tf.Graph()
    dense_transformer = DenseTransformer(args)
    with dense_decoder_graph.as_default():
        with tf.device("cpu"):
            # placeholder for activations
            # weights will get streamed from checkpoint
            inputs_ph = tf.placeholder(args.dtype, activations_np.shape)

        with ipu.scopes.ipu_scope("/device:IPU:0"):
            dense_decoder_fetches = partial(dense_transformer_fwd_and_grad,
                                            dense_transformer)
            dense_graph = ipu.ipu_compiler.compile(dense_decoder_fetches,
                                                   [inputs_ph])
            ipu.utils.move_variable_initialization_to_cpu()

        with tf.device("cpu"):
            # We will only load the trainable variables, not momentum etc.
            loader = tf.train.Saver(tf.trainable_variables())

    # dense-decoder
    with tf.Session(graph=dense_decoder_graph) as sess:
        # Initialized momentums which are not part of the checkpoint
        sess.run(tf.global_variables_initializer())
        # Restore saved trainable variables
        loader.restore(sess, checkpoint_path)
        dense_result = sess.run(dense_graph,
                                feed_dict={inputs_ph: activations_np})

    # TEST
    rtol = 1e-05
    atol = 1e-05
    if args.dtype == tf.float16:
        rtol = 1e-04
        atol = 1e-02
    # Compare model output activations (actual vs. desired) -> (sparse vs. dense)
    np.testing.assert_allclose(sparse_result["output_activation"],
                               dense_result["output_activation"],
                               atol=atol,
                               rtol=rtol,
                               err_msg="Output activations do not match.")

    # Compate gradient of output wrt. input
    np.testing.assert_allclose(sparse_result["input_grad"],
                               dense_result["input_grad"],
                               atol=atol,
                               rtol=rtol,
                               err_msg="Grads wrt. inputs do not match")

    # Compare the dense_w and sparse grads of every sparse layer
    for name, sparse_layer in sparse_transformer.sparse_layers.items():
        # Compate the dense grads
        dense_grad = dense_result[name + "/weight" + "_grad"]
        sparse_grad_w = sparse_result[name + "_grad_w"]
        np.testing.assert_allclose(
            sparse_grad_w,
            dense_grad,
            atol=atol,
            rtol=rtol,
            err_msg=f"Dense grads for layer {name} do not match")

        # Compare the sparse grads
        sparse_grad_padded = sparse_result[name +
                                           "/sparse_layer/nz_values_grad"]
        sparse_grad_data = sparse.SparseRepresentation(
            sparse_layer.weights.get_metainfo(), sparse_grad_padded)
        i, j, sparse_grad = sparse.triplets_from_representation(
            sparse_layer.weights.spec, sparse_grad_data,
            sparse_layer.weights.matmul_options)

        # Convert dense grads to blocks
        block_size, _ = sparse_layer.get_nonzero_blocks_shape()
        nx, ny = dense_grad.shape[0] // block_size, dense_grad.shape[
            1] // block_size
        strides = np.array(dense_grad.strides)  # strides are in bytes
        strides = tuple(strides * block_size) + tuple(strides)
        blocked_dense_grad = np.lib.stride_tricks.as_strided(
            dense_grad, (nx, ny, block_size, block_size), strides)
        blocked_dense_grad = np.squeeze(
            np.copy(blocked_dense_grad
                    ))  # this will squeeze out the special case block size 1
        np.testing.assert_allclose(
            sparse_grad,
            blocked_dense_grad[i, j],
            atol=atol,
            rtol=rtol,
            err_msg=f"Sparse grads for layer {name} do not match")

    print("All results match.")
    return sparse_result, dense_result
コード例 #7
0
    sess.run(sparse_data_update_op, feed_dict=fc.feed_dict())
    sparse_result, sparse_input_grad, sparse_weight_grad, dense_grad_w = sess.run(
        sparse_fetches,
        feed_dict={
            lhs: lhs_values,
            compute_dense_grad_w: True
        })

# Check all the results:

# Convert the sparse gradient metainfo back to triplets and then use those row and col indices
# to index the dense reference weight gradient:
sparse_data = sparse.SparseRepresentation(fc.data.metainfo_state,
                                          sparse_weight_grad[0])
triplets = sparse.triplets_from_representation(fc.spec, sparse_data)
reference_grad_nzvalues = sparse.values_at_indices(triplets[0], triplets[1],
                                                   reference_weight_grad[0])

# Convert the dense reference weight gradient to a sparse one using the same mask
# that we used for the weights so we can compare the nzvalues against the sparse grad:
_, _, values = sparse.triplets_from_dense(reference_weight_grad[0])
sparse_data = sparse.representation_from_triplets(fc.spec, *triplets)
reference_grad_nzvalues = sparse_data.nz_values

# Need to set tolerances for fp32 as numpy is set for doubles by default:
rtol = 1e-05
atol = 1e-06

if not np.allclose(
        reference_result, sparse_result, rtol=rtol, atol=atol, equal_nan=True):
コード例 #8
0
        sparse_result, sparse_input_grad, sparse_weight_grad, dense_grad_w = sess.run(
            sparse_fetches,
            feed_dict={
                lhs: lhs_values,
                compute_dense_grad_w: True
            })

    # Check all the results:

    # Convert the sparse gradient metainfo back to triplets and then use those row and col indices
    # to index the dense reference weight gradient:
    sparse_data = sparse.SparseRepresentation(fc.weights.get_metainfo(),
                                              sparse_weight_grad[0])
    triplets = sparse.triplets_from_representation(fc.weights.spec,
                                                   sparse_data,
                                                   fc.weights.matmul_options)
    if args.block_size == 1:
        reference_grad_nzvalues = sparse.values_at_indices(
            triplets[0], triplets[1], reference_weight_grad)
    else:
        reference_grad_nzvalues = sparse.blocks_at_indices(
            triplets[0], triplets[1], args.block_size, reference_weight_grad)
    # Convert the dense reference weight gradient to a sparse one using the same mask
    # that we used for the weights so we can compare the nzvalues against the sparse grad:
    dense_data = sparse.representation_from_triplets(fc.weights.spec,
                                                     triplets[0], triplets[1],
                                                     reference_grad_nzvalues,
                                                     fc.weights.matmul_options)

    # Set tolerances appropriately as numpy is set for doubles by default:
コード例 #9
0
ファイル: layers.py プロジェクト: shyamalschandra/examples
 def extract_momentum_triplets(self):
     momentum_data = sparse.SparseRepresentation(self.data.metainfo_state,
                                                 self.sparse_momentum)
     return sparse.triplets_from_representation(self.spec, momentum_data)
コード例 #10
0
ファイル: layers.py プロジェクト: shyamalschandra/examples
 def extract_triplets(self):
     return sparse.triplets_from_representation(self.spec, self.data)
コード例 #11
0
                       reference_projections,
                       rtol=rtol,
                       atol=atol,
                       equal_nan=True):
        print(
            f"Max abs error: {np.max(np.abs(projections-reference_projections))}"
        )
        raise RuntimeError("Sparse and reference projections do not match.")

    # Convert the sparse gradient metainfo back to triplets and then use those row and col indices
    # to index the dense reference weight gradient:
    matmul_spec = embedding.projection.weights.spec
    matmul_opts = embedding.projection.weights.matmul_options
    sparse_data = sparse.SparseRepresentation(
        embedding.projection.weights.get_metainfo(), tied_grad_w[0])
    triplets = sparse.triplets_from_representation(matmul_spec, sparse_data,
                                                   matmul_opts)
    # Reference grad is transposed with respect to popsparse one (third Jacobian is the reduction gradient wrt. weights):
    ref_grad_reduced = np.transpose(reference_grads_w)
    if args.block_size == 1:
        reference_grad_nzvalues = sparse.values_at_indices(
            triplets[0], triplets[1], ref_grad_reduced)
    else:
        reference_grad_nzvalues = sparse.blocks_at_indices(
            triplets[0], triplets[1], args.block_size, ref_grad_reduced)
    # Convert the dense reference weight gradient to a sparse one using the same mask
    # that we used for the weights so we can compare the nzvalues against the sparse grad:
    dense_data = sparse.representation_from_triplets(matmul_spec, triplets[0],
                                                     triplets[1],
                                                     reference_grad_nzvalues,
                                                     matmul_opts)