def test_backward_dense(self, batch_size, pooling_factor,
                         pooling_factor_std, tt_ndims):
     device = torch.device("cuda:0")
     torch.cuda.set_device(device)
     tt_p_shapes = [7, 9, 11, 5]
     tt_q_shapes = [3, 4, 5, 7]
     tt_ranks = [13, 12, 7]
     tt_p_shapes = tt_p_shapes[:tt_ndims]
     tt_q_shapes = tt_q_shapes[:tt_ndims]
     tt_ranks = tt_ranks[:(tt_ndims - 1)]
     num_embeddings = np.prod(np.array(tt_p_shapes))
     embedding_dim = np.prod(np.array(tt_q_shapes))
     _, indices, offsets, _ = generate_sparse_feature(
         batch_size,
         num_embeddings=num_embeddings,
         pooling_factor=float(pooling_factor),
         pooling_factor_std=float(pooling_factor_std),
         generate_scores=False,
         unary=False,
         unique=False,
     )
     # create TT-Embedding op
     offsets = torch.tensor(offsets, dtype=torch.int64, device=device)
     indices = torch.tensor(indices, dtype=torch.int64, device=device)
     tt_emb = TTEmbeddingBag(
         num_embeddings=num_embeddings,
         embedding_dim=embedding_dim,
         tt_p_shapes=tt_p_shapes,
         tt_q_shapes=tt_q_shapes,
         tt_ranks=tt_ranks,
         sparse=False,
         weight_dist="uniform",
     )
     tt_emb.to(device)
     emb = torch.nn.EmbeddingBag(
         num_embeddings,
         embedding_dim,
         sparse=True,
         mode="sum",
         _weight=tt_emb.full_weight(),
         include_last_offset=True,
     )
     emb.to(device)
     d_output = torch.rand(batch_size, embedding_dim, device=device) * 0.1
     tt_cores = [
         tt.clone().detach().requires_grad_(True) for tt in tt_emb.tt_cores
     ]
     full_weight = tt_matrix_to_full(tt_p_shapes, tt_q_shapes, tt_ranks,
                                     tt_cores, [1, 0, 2, 3])
     # tt_emb
     output = tt_emb(indices, offsets)
     output.backward(d_output)
     # reference
     output_ref = emb(indices.long(), offsets.long())
     output_ref.backward(d_output)
     d_weight_ref = emb.weight.grad.to_dense()
     full_weight.backward(d_weight_ref)
     for i in range(tt_ndims):
         torch.testing.assert_allclose(tt_emb.tt_cores[i].grad,
                                       tt_cores[i].grad)
Beispiel #2
0
 def test_backward_adagrad(self, batch_size, pooling_factor,
                           pooling_factor_std, tt_ndims):
     device = torch.device("cuda:0")
     torch.cuda.set_device(device)
     tt_p_shapes = [7, 9, 11, 5]
     tt_q_shapes = [3, 4, 5, 7]
     tt_ranks = [13, 12, 7]
     tt_p_shapes = tt_p_shapes[:tt_ndims]
     tt_q_shapes = tt_q_shapes[:tt_ndims]
     tt_ranks = tt_ranks[:(tt_ndims - 1)]
     num_embeddings = np.prod(np.array(tt_p_shapes))
     embedding_dim = np.prod(np.array(tt_q_shapes))
     learning_rate = 0.1
     eps = 0.0001
     _, indices, offsets, _ = generate_sparse_feature(
         batch_size,
         num_embeddings=num_embeddings,
         pooling_factor=float(pooling_factor),
         pooling_factor_std=float(pooling_factor_std),
         generate_scores=False,
         unary=False,
         unique=False,
     )
     # create TT-Embedding op
     offsets = torch.tensor(offsets, dtype=torch.int64, device=device)
     indices = torch.tensor(indices, dtype=torch.int64, device=device)
     tt_emb = TTEmbeddingBag(
         num_embeddings=num_embeddings,
         embedding_dim=embedding_dim,
         tt_p_shapes=tt_p_shapes,
         tt_q_shapes=tt_q_shapes,
         tt_ranks=tt_ranks,
         sparse=True,
         optimizer=OptimType.EXACT_ADAGRAD,
         learning_rate=learning_rate,
         eps=eps,
     )
     tt_emb.to(device)
     emb = torch.nn.EmbeddingBag(
         num_embeddings,
         embedding_dim,
         sparse=True,
         mode="sum",
         _weight=tt_emb.full_weight(),
         include_last_offset=True,
     )
     emb.to(device)
     d_output = torch.rand(batch_size, embedding_dim, device=device) * 0.1
     tt_cores = [
         tt.clone().detach().requires_grad_(True) for tt in tt_emb.tt_cores
     ]
     full_weight = tt_matrix_to_full(tt_p_shapes, tt_q_shapes, tt_ranks,
                                     tt_cores, [1, 0, 2, 3])
     # tt_emb
     output = tt_emb(indices, offsets)
     output.backward(d_output)
     # reference
     output_ref = emb(indices.long(), offsets.long())
     output_ref.backward(d_output)
     d_weight_ref = emb.weight.grad.to_dense()
     full_weight.backward(d_weight_ref)
     new_optimizer_state = []
     new_optimizer_state = [torch.mul(t.grad, t.grad) for t in tt_cores]
     new_tt_cores = []
     new_tt_cores = [
         (t - torch.div(t.grad * learning_rate,
                        torch.sqrt(new_optimizer_state[i]) + eps))
         for i, t in enumerate(tt_cores)
     ]
     for i in range(tt_ndims):
         torch.testing.assert_allclose(tt_emb.optimizer_state[i],
                                       new_optimizer_state[i])
         torch.testing.assert_allclose(tt_emb.tt_cores[i], new_tt_cores[i])