sess.run(sparse_data_update_op, feed_dict=fc.feed_dict()) sparse_result, sparse_input_grad, sparse_weight_grad, dense_grad_w = sess.run( sparse_fetches, feed_dict={ lhs: lhs_values, compute_dense_grad_w: True }) # Check all the results: # Convert the sparse gradient metainfo back to triplets and then use those row and col indices # to index the dense reference weight gradient: sparse_data = sparse.SparseRepresentation(fc.data.metainfo_state, sparse_weight_grad[0]) triplets = sparse.triplets_from_representation(fc.spec, sparse_data) reference_grad_nzvalues = sparse.values_at_indices(triplets[0], triplets[1], reference_weight_grad[0]) # Convert the dense reference weight gradient to a sparse one using the same mask # that we used for the weights so we can compare the nzvalues against the sparse grad: _, _, values = sparse.triplets_from_dense(reference_weight_grad[0]) sparse_data = sparse.representation_from_triplets(fc.spec, *triplets) reference_grad_nzvalues = sparse_data.nz_values # Need to set tolerances for fp32 as numpy is set for doubles by default: rtol = 1e-05 atol = 1e-06 if not np.allclose( reference_result, sparse_result, rtol=rtol, atol=atol, equal_nan=True): print(f"Reference result:\n{reference_result}") print(f"Sparse result:\n{sparse_result}")
f"Max abs error: {np.max(np.abs(projections-reference_projections))}" ) raise RuntimeError("Sparse and reference projections do not match.") # Convert the sparse gradient metainfo back to triplets and then use those row and col indices # to index the dense reference weight gradient: matmul_spec = embedding.projection.weights.spec matmul_opts = embedding.projection.weights.matmul_options sparse_data = sparse.SparseRepresentation( embedding.projection.weights.get_metainfo(), tied_grad_w[0]) triplets = sparse.triplets_from_representation(matmul_spec, sparse_data, matmul_opts) # Reference grad is transposed with respect to popsparse one (third Jacobian is the reduction gradient wrt. weights): ref_grad_reduced = np.transpose(reference_grads_w) if args.block_size == 1: reference_grad_nzvalues = sparse.values_at_indices( triplets[0], triplets[1], ref_grad_reduced) else: reference_grad_nzvalues = sparse.blocks_at_indices( triplets[0], triplets[1], args.block_size, ref_grad_reduced) # Convert the dense reference weight gradient to a sparse one using the same mask # that we used for the weights so we can compare the nzvalues against the sparse grad: dense_data = sparse.representation_from_triplets(matmul_spec, triplets[0], triplets[1], reference_grad_nzvalues, matmul_opts) if logger.level == logging.getLevelName("DEBUG"): print(f"Tied grad-w triplets:\n{triplets}") print( f"Tied grad-w dense:\n{np.transpose(sparse.dense_from_triplets(matmul_spec, *triplets))}" )