def test_block_conversions(self): from ipu_sparse_ops import sparse a = np.kron([[1, 0], [1, 0]], [[1, 2], [3, 4]]) b = np.kron([[0, 0], [1, 0]], [[4, 4], [4, 4]]) dense = a + b bs = 2 spec = sparse.matmul_spec_from_max(2 * bs, [1, 2 * bs], 2, block_size=bs, dtype=tf.float32) blocks = np.reshape([1, 2, 3, 4, 5, 6, 7, 8], [2, bs, bs]) t = ([0, 1], [0, 0], blocks) n = sparse.dense_from_triplets(spec, *t) assert_equal(dense, n) # Check that mask from dense and mask from triplets # return the same result: mask_dense = np.zeros_like(dense) mask_dense[np.nonzero(dense)] = 1 mask_trips = sparse.mask_from_triplets(spec, *t) assert_equal(mask_dense, mask_trips) # Check triplets from dense returns same triplets: td = sparse.triplets_from_dense(dense, bs) assert_equal(t[0], td.row_indices) assert_equal(t[1], td.col_indices) assert_equal(t[2], td.values)
def create_sparse_layers(opts): matmul_opts = {"metaInfoBucketOversizeProportion": opts.meta_info_oversize} in_blocks = opts.input_size // opts.block_size out_blocks = opts.output_size // opts.block_size identity_size = max(in_blocks, out_blocks) block_mask = np.identity(identity_size)[0:in_blocks, 0:out_blocks] block_mask[1, 3] = 1 block_mask[0, 3] = 1 n_blocks = np.count_nonzero(block_mask) el_mask = sparse.block_mask_to_element(block_mask, opts.block_size) n_els = np.count_nonzero(el_mask) masked_rhs = np.zeros_like( el_mask, dtype=np.float32 if opts.dtype == "fp32" else np.float16) values = np.random.rand(n_els) masked_rhs[np.nonzero(el_mask)] = values if opts.block_size == 1: triplets = sparse.triplets_from_dense(masked_rhs) else: triplets = sparse.triplets_from_dense(block_mask) triplets = sparse.Triplets( triplets.row_indices, triplets.col_indices, sparse.blocks_at_indices(triplets.row_indices, triplets.col_indices, opts.block_size, masked_rhs)) fc = layers.SparseFcLayer.from_triplets(opts.output_size, [opts.batchsize, opts.input_size], *triplets, matmul_options=matmul_opts, name="fc_None", dtype=dtype, use_bias=False, relu=False, pooling_type='NONE') fc_pool = layers.SparseFcLayer.from_triplets( opts.output_size, [opts.batchsize, opts.input_size], *triplets, matmul_options=matmul_opts, name="fc_" + opts.pooling_type, dtype=dtype, use_bias=False, relu=False, pooling_type=opts.pooling_type) return fc, fc_pool
def test_conversions(self): from ipu_sparse_ops import sparse m = np.array([[10, 0], [0, 20]]) t = sparse.triplets_from_dense(m) assert_equal(t[0], [0, 1]) assert_equal(t[1], [0, 1]) assert_equal(t[2], [10, 20]) spec = sparse.matmul_spec_from_max(2, [1, 2], 2, tf.float32) n = sparse.dense_from_triplets(spec, *t) assert_equal(n, m) o = sparse.mask_from_triplets(spec, *t) assert_equal(o, np.array([[1, 0], [0, 1]]))
def test_representation_round_trip_blocks(self): from ipu_sparse_ops import sparse for bs in [4, 8, 16]: # Create a mask that describes the non-zero block structure: block_mask = np.array([[1, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]]) n_blocks = np.count_nonzero(block_mask) # From that produce an element-wise mask using a Kronecker product: mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int) n_els = np.count_nonzero(mask) # Make a dense matrix from the element-wise mask and fill with random values: dense = np.zeros_like(mask, dtype=np.float32) values = np.random.rand(n_els) dense[np.nonzero(mask)] = values # Make the spec for the sparse matmul: opts = {"metaInfoBucketOversizeProportion": 1} spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]], max_non_zeros=n_blocks, block_size=bs, dtype=tf.float32) # Make triplets indices from the block mask: t = sparse.triplets_from_dense(block_mask) # Then fill in triplet's values by extracting the blocks # from the dense matrix (this can't be done by reshaping): t_block = sparse.Triplets( t.row_indices, t.col_indices, sparse.blocks_at_indices(t.row_indices, t.col_indices, bs, dense)) # Convert to on device representation and back and check the # result is the dense matrix we sytarted with: r = sparse.representation_from_triplets(spec, *t_block, opts) t_rt = sparse.triplets_from_representation(spec, r, opts) dense_rt = sparse.dense_from_triplets(spec, *t_rt) assert_equal(dense, dense_rt) # Check triplets from dense returns original triplets: td = sparse.triplets_from_dense(dense_rt, bs) assert_equal(t_block.row_indices, td.row_indices) assert_equal(t_block.col_indices, td.col_indices) assert_equal(t_block.values, td.values)
def create_sparse_fc_layer(hidden_size, input_shape, name='fc'): masked_weights = make_fc_weights(opts.input_size, opts.output_size, fc_weights) # Build the fc layer from the masked weights triplets = sparse.triplets_from_dense(masked_weights) fc = layers.SparseFcLayer.from_triplets( opts.output_size, [opts.batchsize, opts.input_size], *triplets, matmul_options={"metaInfoBucketOversizeProportion": 1}, name='sparse_fc_from_triplets', dtype=tf.float32 if opts.dtype == 'fp32' else tf.float16, bias=False, relu=False) return fc, masked_weights
def test_regrow_rigl(self): from ipu_sparse_ops import sparse, sparse_training dense = np.array( [[0.1, 0.2], [0.3, 0.4]]) g = np.array( [[1, 1, 1, 1, 1000], # largest grad in this row is at index (0, 4) [1, 1, 1000, 1, 1]]) # largest grad in this row is at index (1, 2) a = sparse.triplets_from_dense(dense) t = sparse_training.regrow_rigl(a, g, sparse_training.zero_values_generator, 2, True, "test") # Coords of largest grads are (0, 4) and (1, 2): assert_equal(t[0], [0, 1]) # row indices assert_equal(t[1], [4, 2]) # col indices assert_equal(t[2], [0, 0]) # New values are 0 from the generator
def test_regrow_rigl_zero_grad(self): from ipu_sparse_ops import sparse, sparse_training dense = np.array( [[0.1, 0.2], [0.3, 0.4]]) g = np.array( [[1, 1, 0, 0.1, 0], # largest grad in this row is at index (0, 3) [1, 1, 0, 0, 0]]) a = sparse.triplets_from_dense(dense) t = sparse_training.regrow_rigl(a, g, sparse_training.zero_values_generator, 2, True, "test") print(t) # No guarantee about index of second value because we don't use stable sort in regrow_rigl # so only test the first index: assert t[0][0] == 0 assert t[1][0] == 3 assert_equal(t[2], [0, 0]) # New values are 0 from the generator
def make_triplets_test_inputs(args): input_size = args.input_size output_size = args.output_size batch_size = args.batch_size weights_type = tf.float16 if args.data_type == 'fp16' else tf.float32 if args.pattern == 'fixed': rhs_values = np.random.rand(input_size, output_size) sparse_mask = np.identity(input_size) sparse_mask[1, 3] = 1 sparse_mask[0, 7] = 1 masked_rhs = np.multiply(sparse_mask[:, 0:output_size], rhs_values) triplets = sparse.triplets_from_dense(masked_rhs) fc = layers.SparseFcLayer.from_triplets( args.output_size, [args.batch_size, args.input_size], *triplets, matmul_options={"metaInfoBucketOversizeProportion": 0.1}, name='sparse_fc_from_triplets', dtype=weights_type, bias=False, relu=False) elif args.pattern == 'random_sign_ones': indices_random_gen = np.random.default_rng(seed=random_seed) fc = layers.SparseFcLayer.from_random_generator( args.output_size, [args.batch_size, args.input_size], args.density, random_sign_ones_generator, indices_random_gen, matmul_options={"metaInfoBucketOversizeProportion": 0.1}, name='sparse_fc_from_random_sign_ones', bias=False, relu=False) masked_rhs = sparse.dense_from_triplets(fc.weights.spec, *fc.weights.triplets) elif args.pattern == "random_orthogonal": fc = layers.SparseFcLayer.from_random_orthonormal_generator( args.output_size, [args.batch_size, args.input_size], args.density, matmul_options={"metaInfoBucketOversizeProportion": 0.1}, name='sparse_fc_from_random_orthogonal', dtype=weights_type, bias=False, relu=False) masked_rhs = sparse.dense_from_triplets(fc.weights.spec, *fc.weights.triplets) else: random_gen = np.random.default_rng(seed=random_seed) indices_random_gen = np.random.default_rng(seed=random_seed) fc = layers.SparseFcLayer.from_random_generator( args.output_size, [args.batch_size, args.input_size], args.density, random_gen.standard_normal, indices_random_gen, matmul_options={"metaInfoBucketOversizeProportion": 0.1}, name='sparse_fc_from_random', dtype=weights_type, bias=False, relu=False) masked_rhs = sparse.dense_from_triplets(fc.weights.spec, *fc.weights.triplets) return fc, masked_rhs.astype(weights_type.as_numpy_dtype())
def test_representation_round_trip_elements(self): from ipu_sparse_ops import sparse bs = 16 block_mask = np.array([[1, 0, 0], [0, 1, 0], [1, 1, 0], [0, 0, 1]]) mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int) n_els = np.count_nonzero(mask) dense = np.zeros_like(mask) dense[np.nonzero(mask)] = np.arange(n_els) opts = {"metaInfoBucketOversizeProportion": 1} t = sparse.triplets_from_dense(dense) spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]], max_non_zeros=n_els, block_size=1, dtype=tf.float32) r = sparse.representation_from_triplets(spec, *t, opts) t_rt = sparse.triplets_from_representation(spec, r, opts) dense_rt = sparse.dense_from_triplets(spec, *t_rt) assert_equal(dense, dense_rt)
def make_embedding_layer_and_test_inputs(args): input_size = args.embedding_size output_size = args.hidden_size batch_size = args.batch_size weights_type = tf.float16 if args.data_type == 'fp16' else tf.float32 matmul_opts = {"metaInfoBucketOversizeProportion": args.meta_info_oversize} if args.pattern == 'fixed': in_blocks = input_size // args.block_size out_blocks = output_size // args.block_size identity_size = max(in_blocks, out_blocks) block_mask = np.identity(identity_size)[0:in_blocks, 0:out_blocks] n_blocks = in_blocks for r in range(n_blocks): block_mask[r, r % out_blocks] = 1 assert n_blocks == np.count_nonzero(block_mask) el_mask = sparse.block_mask_to_element(block_mask, args.block_size) n_els = np.count_nonzero(el_mask) embedding_weights = np.zeros_like(el_mask, dtype=np.float32) embedding_weights[np.nonzero(el_mask)] = np.random.rand(n_els) token_ids = np.arange(n_els).astype(np.uint32) np.random.shuffle(token_ids) token_ids = token_ids[0:args.sequence_size] # First create a sparse weight matrix to perform the sparse tied projection. # This projects from the hidden size back onto the input embedding size # (hence it is the transpose of the embedding matrix defined above): triplets = sparse.triplets_from_dense(np.transpose(embedding_weights), args.block_size) projection = layers.SparseFcLayer.from_triplets( args.embedding_size, [args.sequence_size, args.hidden_size], *triplets, matmul_options=matmul_opts, name='sparse_projection_from_triplets', dtype=weights_type, use_bias=False, relu=False, pooling_type=args.pooling_type) # Next create the embedding layer from the projection (tying them together): layer = layers.SparseTiedEmbedding.from_sparse_projection( "tied_embedding", projection) else: raise RuntimeError("Invalid generator") return layer, embedding_weights, token_ids
def test_device_version_equality_ipu2(self): from ipu_sparse_ops import sparse bs = 16 block_mask = np.array([[1, 0, 0], [0, 1, 0], [1, 1, 0], [0, 0, 1]]) mask = np.kron(block_mask, np.ones(shape=[bs, bs])).astype(int) n_els = np.count_nonzero(mask) dense = np.zeros_like(mask) dense[np.nonzero(mask)] = np.arange(n_els) opts = {"metaInfoBucketOversizeProportion": 1} t = sparse.triplets_from_dense(dense) spec = sparse.matmul_spec_from_max(dense.shape[1], [2, dense.shape[0]], max_non_zeros=n_els, block_size=1, dtype=tf.float32) # from device device_r = sparse.representation_from_triplets(spec, *t, opts, ipu_version=0) device_t_rt = sparse.triplets_from_representation(spec, device_r, opts, ipu_version=0) # from version version_r = sparse.representation_from_triplets(spec, *t, opts, ipu_version=2) version_t_rt = sparse.triplets_from_representation(spec, version_r, opts, ipu_version=2) assert_equal(device_r.metainfo_state, version_r.metainfo_state) assert_equal(device_r.nz_values, version_r.nz_values) assert_equal(device_t_rt, version_t_rt)
def make_triplets_test_inputs(args, data_type): input_size = args.input_size output_size = args.output_size batch_size = args.batch_size num_groups = 1 topk_ratio = 0.5 if args.pattern == 'fixed': rhs_values = np.random.rand(input_size, output_size) sparse_mask = np.identity(input_size) sparse_mask[1, 3] = 1 sparse_mask[0, 7] = 1 masked_rhs = np.multiply(sparse_mask[:, 0:output_size], rhs_values) triplets = sparse.triplets_from_dense(masked_rhs) fc = layers.SparseFcLayer.from_triplets( args.output_size, [args.batch_size, args.input_size], topk_ratio, *triplets) else: random_gen = np.random.default_rng(seed=random_seed) fc = layers.SparseFcLayer.from_random_generator( args.output_size, [args.batch_size, args.input_size], args.density, topk_ratio, random_gen.standard_normal, random_seed) masked_rhs = sparse.dense_from_triplets(fc.spec, *fc.triplets) return fc, masked_rhs
compute_dense_grad_w: True }) # Check all the results: # Convert the sparse gradient metainfo back to triplets and then use those row and col indices # to index the dense reference weight gradient: sparse_data = sparse.SparseRepresentation(fc.data.metainfo_state, sparse_weight_grad[0]) triplets = sparse.triplets_from_representation(fc.spec, sparse_data) reference_grad_nzvalues = sparse.values_at_indices(triplets[0], triplets[1], reference_weight_grad[0]) # Convert the dense reference weight gradient to a sparse one using the same mask # that we used for the weights so we can compare the nzvalues against the sparse grad: _, _, values = sparse.triplets_from_dense(reference_weight_grad[0]) sparse_data = sparse.representation_from_triplets(fc.spec, *triplets) reference_grad_nzvalues = sparse_data.nz_values # Need to set tolerances for fp32 as numpy is set for doubles by default: rtol = 1e-05 atol = 1e-06 if not np.allclose( reference_result, sparse_result, rtol=rtol, atol=atol, equal_nan=True): print(f"Reference result:\n{reference_result}") print(f"Sparse result:\n{sparse_result}") diff = reference_result - sparse_result print(f"Difference:\n{diff}") diff_triplet = sparse.triplets_from_dense(diff) print(
def make_fc_layer_and_test_inputs(args): input_size = args.input_size output_size = args.output_size batch_size = args.batch_size weights_type = tf.float16 if args.data_type == 'fp16' else tf.float32 matmul_opts = {"metaInfoBucketOversizeProportion": args.meta_info_oversize} if args.pattern == 'fixed': in_blocks = input_size // args.block_size out_blocks = output_size // args.block_size identity_size = max(in_blocks, out_blocks) block_mask = np.identity(identity_size)[0:in_blocks, 0:out_blocks] block_mask[1, 3] = 1 block_mask[0, 7] = 1 n_blocks = np.count_nonzero(block_mask) el_mask = sparse.block_mask_to_element(block_mask, args.block_size) n_els = np.count_nonzero(el_mask) masked_rhs = np.zeros_like(el_mask, dtype=np.float32) values = np.random.rand(n_els) masked_rhs[np.nonzero(el_mask)] = values if args.block_size == 1: triplets = sparse.triplets_from_dense(masked_rhs) else: triplets = sparse.triplets_from_dense(block_mask) triplets = sparse.Triplets( triplets.row_indices, triplets.col_indices, sparse.blocks_at_indices(triplets.row_indices, triplets.col_indices, args.block_size, masked_rhs)) fc = layers.SparseFcLayer.from_triplets( args.output_size, [args.batch_size, args.input_size], *triplets, matmul_options=matmul_opts, name='sparse_fc_from_triplets', dtype=weights_type, use_bias=False, relu=False, pooling_type=args.pooling_type) elif args.pattern == 'random_sign_ones': indices_random_gen = np.random.default_rng(seed=random_seed) fc = layers.SparseFcLayer.from_random_generator( args.output_size, [args.batch_size, args.input_size], args.density, args.block_size, random_sign_ones_generator, indices_random_gen, matmul_options=matmul_opts, name='sparse_fc_from_random_sign_ones', use_bias=False, relu=False, pooling_type=args.pooling_type) masked_rhs = sparse.dense_from_triplets(fc.weights.spec, *fc.weights.triplets) elif args.pattern == "random_orthogonal": if args.input_size != args.output_size: raise ValueError( "random_orthogonal pattern requires square matrix") matrix, max_non_zeros = sparse.gen_sparse_rand_orthog_mat( args.output_size, args.density, args.block_size) triplets = sparse.triplets_from_dense(matrix, args.block_size) fc = layers.SparseFcLayer.from_triplets( args.output_size, [args.batch_size, args.input_size], *triplets, matmul_options=matmul_opts, name='sparse_fc_random_orthogonal', dtype=weights_type, use_bias=False, relu=False, pooling_type=args.pooling_type) masked_rhs = sparse.dense_from_triplets(fc.weights.spec, *fc.weights.triplets) else: random_gen = np.random.default_rng(seed=random_seed) indices_random_gen = np.random.default_rng(seed=random_seed) fc = layers.SparseFcLayer.from_random_generator( args.output_size, [args.batch_size, args.input_size], args.density, args.block_size, random_gen.standard_normal, indices_random_gen, matmul_options=matmul_opts, name='sparse_fc_from_random', dtype=weights_type, use_bias=False, relu=False, pooling_type=args.pooling_type) masked_rhs = fc.weights.extract_dense() return fc, masked_rhs.astype(weights_type.as_numpy_dtype())
atol = 1e-02 elif args.pattern == 'random_orthogonal': rtol = 1e-07 atol = 1e-06 else: rtol = 1e-05 atol = 1e-06 if not np.allclose( reference_result, sparse_result, rtol=rtol, atol=atol, equal_nan=True): print(f"Reference result:\n{reference_result}") print(f"Sparse result:\n{sparse_result}") diff = reference_result - sparse_result print(f"Difference:\n{diff}") diff_triplet = sparse.triplets_from_dense(diff) print( f"Difference triplets:\nrows: {diff_triplet[0]}\ncols: {diff_triplet[1]}\nvalues: {diff_triplet[2]}" ) raise RuntimeError("Sparse and reference results do not match.") if not np.allclose(reference_input_grad, sparse_input_grad, rtol=rtol, atol=atol, equal_nan=True): raise RuntimeError( "Sparse and reference input gradients do not match.") if not np.allclose(dense_data.nz_values, sparse_weight_grad,
fc_weights = np.random.rand(10) # Create a first graph and run it to retrieve the weights from the ipu. Then create a checkpoint graph, outfeed_queue, fc, x_fc, test_op, upload_sparse, dequeue = make_graph( fc_weights=fc_weights) with tf.Session(graph=graph) as sess: # init sess.run(tf.global_variables_initializer()) # run and outfeed weights sess.run(test_op, feed_dict={x_fc: x_fc_in}) results_1 = sess.run(dequeue) # Update position of the nz only new_triplets = sparse.triplets_from_dense( make_fc_weights(opts.input_size, opts.output_size, fc_weights)) fc.update_triplets(new_triplets) sparse_feed = fc.feed_dict() sess.run(upload_sparse, sparse_feed) sess.run(test_op, feed_dict={x_fc: x_fc_in}) results_2 = sess.run(dequeue) # update all weights fc_weights = np.random.rand(10) new_triplets_2 = sparse.triplets_from_dense( make_fc_weights(opts.input_size, opts.output_size, fc_weights)) fc.update_triplets(new_triplets_2) sparse_feed = fc.feed_dict() sess.run(upload_sparse, sparse_feed)