def partial_crossing(self, partial_inputs, ragged_out, sparse_out): """Gets the crossed output from a partial list/tuple of inputs.""" # If ragged_out=True, convert output from sparse to ragged. if ragged_out: # TODO(momernick): Support separator with ragged_cross. if self.separator != '_X_': raise ValueError('Non-default separator with ragged input is not ' 'supported yet, given {}'.format(self.separator)) return ragged_array_ops.cross(partial_inputs) elif sparse_out: return sparse_ops.sparse_cross(partial_inputs, separator=self.separator) else: return sparse_ops.sparse_tensor_to_dense( sparse_ops.sparse_cross(partial_inputs, separator=self.separator))
def testRaggedCross(self, inputs, num_buckets=0, hash_key=None, expected=None, expected_hashed=None, matches_sparse_cross=True): ragged_cross = ragged_array_ops.cross(inputs) ragged_cross_hashed = ragged_array_ops.cross_hashed( inputs, num_buckets, hash_key) if expected is not None: self.assertAllEqual(ragged_cross, expected) if expected_hashed is not None: self.assertAllEqual(ragged_cross_hashed, expected_hashed) if matches_sparse_cross: # Check that ragged.cross & sparse.cross match. sparse_inputs = [self._ragged_to_sparse(t) for t in inputs] sparse_cross = sparse_ops.sparse_cross(sparse_inputs) self.assertAllEqual( ragged_cross, ragged_tensor.RaggedTensor.from_sparse(sparse_cross)) # Check that ragged.cross_hashed & sparse.cross_hashed match. sparse_inputs = [self._ragged_to_sparse(t) for t in inputs] sparse_cross_hashed = sparse_ops.sparse_cross_hashed( sparse_inputs, num_buckets, hash_key) self.assertAllEqual( ragged_cross_hashed, ragged_tensor.RaggedTensor.from_sparse(sparse_cross_hashed))
def test_large_batch(self): """Tests with large batch size to force multithreading.""" batch_size = 5000 col1 = [] col2 = [] col3 = [] for b in range(batch_size): col1.append([ 'batch%d-FC1-F1' % b, 'batch%d-FC1-F2' % b, 'batch%d-FC1-F3' % b ]) col2.append(['batch%d-FC2-F1' % b]) col3.append(['batch%d-FC3-F1' % b, 'batch%d-FC3-F2' % b]) op = sparse_ops.sparse_cross([ self._sparse_tensor(col1), self._sparse_tensor(col2), self._sparse_tensor(col3) ]) col_out = [] for b in range(batch_size): col_out.append([ 'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b), 'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b), 'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b) ]) expected_out = self._sparse_tensor(col_out) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def test_large_batch(self): """Tests with large batch size to force multithreading.""" batch_size = 5000 col1 = [] col2 = [] col3 = [] for b in range(batch_size): col1.append( ['batch%d-FC1-F1' % b, 'batch%d-FC1-F2' % b, 'batch%d-FC1-F3' % b]) col2.append(['batch%d-FC2-F1' % b]) col3.append(['batch%d-FC3-F1' % b, 'batch%d-FC3-F2' % b]) op = sparse_ops.sparse_cross([ self._sparse_tensor(col1), self._sparse_tensor(col2), self._sparse_tensor(col3) ]) col_out = [] for b in range(batch_size): col_out.append([ 'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b), 'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b), 'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b) ]) expected_out = self._sparse_tensor(col_out) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def test_dense(self): """Tests only dense inputs.""" op = sparse_ops.sparse_cross([ constant_op.constant([['batch1-FC1-F1', 'batch1-FC1-F2'], ['batch2-FC1-F1', 'batch2-FC1-F2']], dtypes.string), constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], ['batch2-FC2-F1', 'batch2-FC2-F2']], dtypes.string), ]) expected_out = self._sparse_tensor( [[ 'batch1-FC1-F1_X_batch1-FC2-F1', 'batch1-FC1-F1_X_batch1-FC2-F2', 'batch1-FC1-F2_X_batch1-FC2-F1', 'batch1-FC1-F2_X_batch1-FC2-F2' ], [ 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def testNonScalarInput(self): with self.assertRaisesRegex(errors.InvalidArgumentError, 'Input separator should be a scalar.'): self.evaluate( sparse_ops.sparse_cross(inputs=[], name='a', separator=constant_op.constant( ['a', 'b'], dtype=dtypes.string)))
def test_all_columns_empty(self): """Tests when all columns are empty. The crossed tensor should be empty. """ op = sparse_ops.sparse_cross([ self._sparse_tensor([]), self._sparse_tensor([]), self._sparse_tensor([]) ]) with self.cached_session() as sess: self._assert_sparse_tensor_empty(sess.run(op))
def test_one_column_empty(self): """Tests when one column is empty. The crossed tensor should be empty. """ op = sparse_ops.sparse_cross([ self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']]), self._sparse_tensor([], 1), self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) ]) with self.cached_session() as sess: self._assert_sparse_tensor_empty(sess.run(op))
def call(self, inputs): sparse_output = False if any([isinstance(inp, sparse_tensor.SparseTensor) for inp in inputs]): sparse_output = True if self.num_bins is not None: output = sparse_ops.sparse_cross_hashed( inputs, num_buckets=self.num_bins) else: output = sparse_ops.sparse_cross(inputs) if not sparse_output: output = sparse_ops.sparse_tensor_to_dense(output) return output
def test_invalid_sparse_tensors(self): # Test validation of invalid SparseTensors. The SparseTensor constructor # prevents us from creating invalid SparseTensors (eps. in eager mode), # so we create valid SparseTensors and then modify them to be invalid. st1 = sparse_tensor.SparseTensor([[0, 0]], [0], [2, 2]) st1._indices = array_ops.zeros([], dtypes.int64) with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError), 'Input indices should be a matrix'): self.evaluate(sparse_ops.sparse_cross([st1])) st2 = sparse_tensor.SparseTensor([[0, 0]], [0], [2, 2]) st2._values = array_ops.zeros([], dtypes.int64) with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError), 'Input values should be a vector'): self.evaluate(sparse_ops.sparse_cross([st2])) st3 = sparse_tensor.SparseTensor([[0, 0]], [0], [2, 2]) st3._dense_shape = array_ops.zeros([], dtypes.int64) with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError), 'Input shapes should be a vector'): self.evaluate(sparse_ops.sparse_cross([st3]))
def test_integer_mixed_string_sparse(self): """Tests mixed type.""" op = sparse_ops.sparse_cross([ self._sparse_tensor([[11], [333, 55555]]), self._sparse_tensor([['batch1-FC2-F1'], ['batch2-FC2-F1', 'batch2-FC2-F2']]) ]) expected_out = self._sparse_tensor([['11_X_batch1-FC2-F1'], [ '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2' ]]) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def test_simple(self): """Tests a simple scenario.""" op = sparse_ops.sparse_cross([ self._sparse_tensor([['batch1-FC1-F1'], ['batch2-FC1-F1', 'batch2-FC1-F2']]), self._sparse_tensor([['batch1-FC2-F1'], ['batch2-FC2-F1', 'batch2-FC2-F2']]) ]) expected_out = self._sparse_tensor([['batch1-FC1-F1_X_batch1-FC2-F1'], [ 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def partial_crossing(self, partial_inputs, ragged_out, sparse_out): """Gets the crossed output from a partial list/tuple of inputs.""" if self.num_bins is not None: partial_output = sparse_ops.sparse_cross_hashed( partial_inputs, num_buckets=self.num_bins) else: partial_output = sparse_ops.sparse_cross(partial_inputs) # If ragged_out=True, convert output from sparse to ragged. if ragged_out: return ragged_tensor.RaggedTensor.from_sparse(partial_output) elif sparse_out: return partial_output else: return sparse_ops.sparse_tensor_to_dense(partial_output)
def test_integer_sparse_input(self): """Tests mixed type sparse and dense inputs.""" op = sparse_ops.sparse_cross([ self._sparse_tensor([[11], [333, 5555]]), constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], ['batch2-FC2-F1', 'batch2-FC2-F2']], dtypes.string), ]) expected_out = self._sparse_tensor( [['11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2'], [ '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2' ]]) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def test_sparse_cross_dense(self): """Tests sparse and dense inputs.""" op = sparse_ops.sparse_cross([ self._sparse_tensor([['batch1-FC1-F1'], ['batch2-FC1-F1', 'batch2-FC1-F2']]), constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], ['batch2-FC2-F1', 'batch2-FC2-F2']], dtypes.string), ]) expected_out = self._sparse_tensor( [['batch1-FC1-F1_X_batch1-FC2-F1', 'batch1-FC1-F1_X_batch1-FC2-F2'], [ 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def test_integer_mixed_string_dense(self): """Tests mixed dense inputs.""" op = sparse_ops.sparse_cross([ constant_op.constant([[11, 333], [55555, 999999]], dtypes.int64), constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], ['batch2-FC2-F1', 'batch2-FC2-F2']], dtypes.string), ]) expected_out = self._sparse_tensor([[ '11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2', '333_X_batch1-FC2-F1', '333_X_batch1-FC2-F2' ], [ '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2', '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2' ]]) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def test_permutation_3x1x2(self): """Tests 3x1x2 permutation.""" op = sparse_ops.sparse_cross([ self._sparse_tensor( [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]), self._sparse_tensor([['batch1-FC2-F1']]), self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) ]) expected_out = self._sparse_tensor([[ 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2', 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2', 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2' ]]) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def test_some_columns_empty(self): """Tests when more than one columns are empty. Cross for the corresponding batch should be empty. """ op = sparse_ops.sparse_cross([ self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']], 2), self._sparse_tensor([['batch1-FC2-F1'], ['batch2-FC2-F1']], 2), self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']], 2) ]) expected_out = self._sparse_tensor([[ 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2', 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2' ]], 2) with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op))
def run_dataset_implementation(self, batch_size): num_repeats = 5 starts = [] ends = [] for _ in range(num_repeats): ds = dataset_ops.Dataset.from_generator( int_gen, (dtypes.int64, dtypes.int64), (tensor_shape.TensorShape([1]), tensor_shape.TensorShape([1]))) ds = ds.shuffle(batch_size * 100) ds = ds.batch(batch_size) num_batches = 5 ds = ds.take(num_batches) ds = ds.prefetch(num_batches) starts.append(time.time()) # Benchmarked code begins here. for i in ds: _ = sparse_ops.sparse_cross([i[0], i[1]]) # Benchmarked code ends here. ends.append(time.time()) avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches return avg_time