def test_to_sparse(): c = [[1, 0], [2, 3]] sparse_tensor = tx.to_sparse(c) dense_shape = tf.shape(c, out_type=tf.int64) indices = tf.where(tf.not_equal(c, 0)) flat_values = tf.reshape(c, [-1]) flat_indices = tf.where(tf.not_equal(flat_values, 0)) flat_indices = tf.squeeze(flat_indices) flat_indices = tf.math.mod(flat_indices, dense_shape[1]) values = tf.gather_nd(c, indices) sp_indices = tx.sparse_indices(sparse_tensor) assert tx.tensor_equal(sparse_tensor.indices, indices) assert tx.tensor_equal(sp_indices.values, flat_indices) assert tx.tensor_equal(sparse_tensor.values, values)
labels = [[0, 1], [2, 3]] flat_labels = tf.reshape(labels, [-1]) vocab_size = 1000 k = 100 s = 2 embed_size = 4 generator = Generator(k, s) ris = [generator.generate() for _ in range(vocab_size)] ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s) sp_values = ri_tensor.gather(flat_labels).to_sparse_tensor() sp_indices = tx.sparse_indices(sp_values) print(sp_values.get_shape()) print(tensor_util.constant_value_as_shape(sp_values.dense_shape)) print(tensor_util.constant_value(sp_values.dense_shape)) print(sp_values.dense_shape[-1].eval()) print(tf.shape(sp_values).eval()) lookup = tx.Lookup(tx.TensorLayer(sp_values), seq_size=1, lookup_shape=[k, embed_size]) linear = tx.Linear(tx.TensorLayer(sp_values), n_units=k, shared_weights=lookup.weights)
print("adaptive sample: ", tf.shape(noise_logits.tensor).eval()) print("[noise sample shape] {}".format(tf.shape(sampled).eval())) labels_flat = array_ops.reshape(input_labels.tensor, [-1]) true_ris = tx.gather_sparse(sp_tensor=ri_tensor, ids=labels_flat) noise_ris = tx.gather_sparse(sp_tensor=ri_tensor, ids=sampled) print("----") print("[true_ri shape] {}".format(tf.shape(true_ris).eval())) print("[noise_ri shape] {}".format(tf.shape(noise_ris).eval())) print("----") true_w = embedding_lookup_sparse(params=lookup.weights, sp_ids=tx.sparse_indices(true_ris), sp_weights=true_ris, combiner="sum", partition_strategy="mod") noise_w = embedding_lookup_sparse(params=lookup.weights, sp_ids=tx.sparse_indices(noise_ris), sp_weights=noise_ris, combiner="sum", partition_strategy="mod") print("[true_w shape] {}".format(tf.shape(true_w).eval())) print("[noise_w shape] {}".format(tf.shape(noise_w).eval())) print("----") # ************************************* # LOGITS
r1 = tf.matmul(A, B, transpose_b=True) rs = tf.sparse_tensor_dense_matmul(C, A, adjoint_b=True) rs = tf.transpose(rs) D = tf.sparse_tensor_to_dense(C) r2 = tf.sparse_matmul(A, B, transpose_b=True, b_is_sparse=True) sess = tf.Session() Cd = tf.sparse_tensor_to_dense(C) r3 = tf.matmul(A, Cd, b_is_sparse=True, transpose_b=True) #Ct = tf.sparse_transpose(C) Ct = C Ci = tx.sparse_indices(Ct) r4 = tf.nn.embedding_lookup_sparse(tf.transpose(A), sp_ids=Ci, sp_weights=Ct, combiner="sum") print(sess.run(r1)) print("=" * 40) print(sess.run(rs)) print("=" * 40) print(sess.run(r2)) print("=" * 40) print(sess.run(r3)) print("=" * 40) print(sess.run(r4))
def _compute_random_ri_sampled_logits(ri_tensors, k_dim, s_active, weights, labels, inputs, num_sampled, num_true=1, subtract_log_q=True, partition_strategy="mod", name=None, seed=None): """ Random Random Index Sampled Logits with negative sampling https://arxiv.org/pdf/1410.8251.pdf Computes the sampled logits from the space of all possible random indexes. Since any random index is possible, we sample, not from the existing random indexes but from the space of possible random indexes so that the model learns which combinations of bases are NOT the ones used to predict a given feature. Args: ri_tensors: k_dim: s_active: weights: labels: inputs: num_sampled: sampled_values: num_true: subtract_log_q: remove_accidental_hits: partition_strategy: name: seed: Returns: """ if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "random_ri_sampled_logits", weights + [inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat) sampled_ris, expected_true_ris, expected_sampled_ris = sample_ri(k_dim, s_active, num_sampled, true_ris) all_ris = sparse_ops.sparse_concat(axis=0, sp_inputs=[true_ris, sampled_ris]) sp_values = all_ris sp_indices = tx.sparse_indices(sp_values) # Retrieve the weights # weights shape is [num_classes, dim] all_w = embedding_lookup_sparse( weights, sp_indices, sp_values, combiner="sum", partition_strategy=partition_strategy) # true_w shape is [batch_size * num_true, dim] true_w = array_ops.slice(all_w, [0, 0], array_ops.stack( [array_ops.shape(labels_flat)[0], -1])) sampled_w = array_ops.slice( all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # Apply X*W', which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) row_wise_dots = math_ops.multiply( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) if subtract_log_q: # Subtract log of Q(l), prior probability that label appears in sampled. true_logits -= math_ops.log(expected_true_ris) sampled_logits -= math_ops.log(expected_sampled_ris) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ], 1) return out_logits, out_labels
def _compute_sampled_logits(ri_tensors, weights, bias, labels, partition_const, inputs, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None, seed=None): if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes, seed=seed) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = ( array_ops.stop_gradient(s) for s in sampled_values) # pylint: enable=unpacking-non-sequence sampled = math_ops.cast(sampled, dtypes.int64) # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat([labels_flat, sampled], 0) # true_ris true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat) sampled_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=sampled) true_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(true_ris), sp_weights=true_ris, combiner="sum", partition_strategy=partition_strategy) noise_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(sampled_ris), sp_weights=sampled_ris, combiner="sum", partition_strategy=partition_strategy) if bias is not None: sampled_b = embedding_lookup_sparse( params=bias, sp_ids=tx.sparse_indices(sampled_ris), sp_weights=sampled_ris, combiner="sum", partition_strategy=partition_strategy) true_b = embedding_lookup_sparse( params=bias, sp_ids=tx.sparse_indices(true_ris), sp_weights=true_ris, combiner="sum", partition_strategy=partition_strategy) noise_logits = math_ops.matmul(inputs, noise_w, transpose_b=True) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) true_w_e = array_ops.reshape(true_w, new_true_w_shape) row_wise_dots = math_ops.multiply(array_ops.expand_dims(inputs, 1), true_w_e) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) if bias is not None: true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b noise_logits += sampled_b # TODO need to review how to do this Z # true_logits = true_logits * math_ops.exp(partition_const) if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat([acc_indices_2d, acc_ids_2d_int32], 1, "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)], 0) if noise_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, noise_logits.dtype) noise_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) noise_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, noise_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(noise_logits) ], 1) # out_logits = math_ops.div(out_logits,math_ops.exp(partition_const)) # out_logits = out_logits / (partition_const + 1) return out_logits, out_labels
def _sampled_logits_from_parametric_noise(ri_tensors, k_dim, weights, labels, inputs, input_dim, num_true=1, partition_strategy="mod", name=None): if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # true_ris true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat) true_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(true_ris), sp_weights=true_ris, combiner="sum", partition_strategy=partition_strategy) label_layer = tx.TensorLayer(true_w, input_dim) noise_fn = tx.FC(label_layer, 512, activation=tx.relu) noise_fn_sp = tx.ToSparse(noise_fn) noise_ris = tx.Linear(noise_fn_sp, k_dim, weight_init=tx.glorot_uniform(), bias=True) noise_ris_sp = tx.ToSparse(noise_ris) noise_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(noise_ris_sp.tensor), sp_weights=noise_ris_sp.tensor, combiner="sum", partition_strategy=partition_strategy) noise_logits = math_ops.matmul(inputs, noise_w, transpose_b=True) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) true_w_e = array_ops.reshape(true_w, new_true_w_shape) row_wise_dots = math_ops.multiply(array_ops.expand_dims(inputs, 1), true_w_e) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, noise_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(noise_logits) ], 1) # out_logits = out_logits * math_ops.exp(partition_const) # out_logits = out_logits / (partition_const + 1) return out_logits, out_labels