def test_convert_custom_sparse_to_dense_bare_keras_fn(self): convert_custom_sparse_to_dense_bare_keras = BareKerasUtil._convert_custom_sparse_to_dense_fn( ) custom_sparse_row = np.array([2, 1, 2, 0.1, 0.2]) sparse_row = convert_custom_sparse_to_dense_bare_keras( custom_sparse_row, 4) assert np.array_equal(sparse_row, np.array([0., 0.1, 0.2, 0.]))
def test_prepare_data_bare_keras_fn(self): metadata = \ { 'col1': { 'dtype': float, 'intermediate_format': 'nochange', 'max_size': 1, 'shape': 1 }, 'col2': { 'dtype': 'float', 'intermediate_format': 'nochange', 'max_size': 1, 'shape': 1 }, 'col3': { 'dtype': SparseVector, 'intermediate_format': 'custom_sparse_format', 'max_size': 7, 'shape': 10 } } prepare_data_bare_keras = BareKerasUtil._prepare_data_fn(metadata) col1 = np.array([1., 2., 3.]) col1_prepared = prepare_data_bare_keras(col1, 'col1', [-1, 3]) assert col1_prepared.shape == (1, 3) assert np.array_equal(col1_prepared, np.array([[1., 2., 3.]])) col3 = [ np.array([3., 0., 2., 5., 0., 0.2, 0.5, 0, 0]), np.array([4., 0., 2., 5., 6., 0.2, 0.5, 0.6, 0]) ] col3_prepared = prepare_data_bare_keras(col3, 'col3', [-1, 10]) assert col3_prepared.shape == (2, 10) assert np.array_equal( col3_prepared, np.array([[0., 0., 0.2, 0., 0., 0.5, 0., 0., 0., 0.], [0.2, 0., 0.5, 0., 0., 0.6, 0., 0., 0., 0.]]))
def test_batch_generator_fn(self): shuffle_buffer_size = 10 rows_in_row_group = 100 batch_size = 32 def _create_numpy_array(n_rows, shape): return np.array([[i for i in range(j, j + shape)] for j in range(n_rows)]) """A dummy reader class only run 1 epoch (2 rows of data) for each iteration""" class DummyReader(): def __init__(self): self._in_iter = False def __iter__(self): if self._in_iter: raise RuntimeError('Do not support resetting a dummy reader while in the middle of iteration.') self._in_iter = True Row = collections.namedtuple('row', ['col1', 'col2', 'sample_weight', 'label']) col11 = _create_numpy_array(rows_in_row_group, 1) col21 = _create_numpy_array(rows_in_row_group, 10) label1 = _create_numpy_array(rows_in_row_group, 8) sw1 = np.array([i / 100. for i in range(rows_in_row_group)]) row1 = Row(col1=col11, col2=col21, label=label1, sample_weight=sw1) col12 = _create_numpy_array(rows_in_row_group, 1) col22 = _create_numpy_array(rows_in_row_group, 10) label2 = _create_numpy_array(rows_in_row_group, 8) sw2 = np.array([i / 100. for i in range(rows_in_row_group)]) row2 = Row(col1=col12, col2=col22, label=label2, sample_weight=sw2) try: yield row1 yield row2 finally: self._in_iter = False metadata = \ { 'col1': { 'dtype': float, 'intermediate_format': constants.NOCHANGE, 'max_size': 1, 'shape': 1 }, 'col2': { 'dtype': DenseVector, 'intermediate_format': constants.ARRAY, 'max_size': 10, 'shape': 10 }, 'label': { 'dtype': float, 'intermediate_format': constants.NOCHANGE, 'max_size': 1, 'shape': 1 }, } reader = DummyReader() feature_columns = ['col1', 'col2'] label_columns = ['label'] sample_weight_col = 'sample_weight' input_shapes = [[-1, 1], [-1, 2, 5]] output_shapes = [[-1, 2, 4]] batch_generator = BareKerasUtil._batch_generator_fn( feature_columns, label_columns, sample_weight_col, input_shapes, output_shapes, metadata) for shuffle in [True, False]: batch_gen = batch_generator(reader, batch_size, shuffle_buffer_size, shuffle=shuffle) for _ in range(10): batch = next(batch_gen) assert batch[0][0][0].shape == (1,) assert batch[0][1][0].shape == (2, 5) assert batch[1][0][0].shape == (2, 4) # sample weight has to be a singel np array with shape (batch_size,) assert batch[2][0].shape == (batch_size,)
def test_batch_generator_fn(self): shuffle_buffer_size = 10 rows_in_row_group = 100 batch_size = 32 def _create_numpy_array(n_rows, shape): return np.array([[i for i in range(j, j + shape)] for j in range(n_rows)]) def dummy_reader(): Row = collections.namedtuple( 'row', ['col1', 'col2', 'sample_weight', 'label']) col11 = _create_numpy_array(rows_in_row_group, 1) col21 = _create_numpy_array(rows_in_row_group, 10) label1 = _create_numpy_array(rows_in_row_group, 8) sw1 = np.array([i / 100. for i in range(rows_in_row_group)]) row1 = Row(col1=col11, col2=col21, label=label1, sample_weight=sw1) col12 = _create_numpy_array(rows_in_row_group, 1) col22 = _create_numpy_array(rows_in_row_group, 10) label2 = _create_numpy_array(rows_in_row_group, 8) sw2 = np.array([i / 100. for i in range(rows_in_row_group)]) row2 = Row(col1=col12, col2=col22, label=label2, sample_weight=sw2) while True: yield row1 yield row2 metadata = \ { 'col1': { 'dtype': float, 'intermediate_format': constants.NOCHANGE, 'max_size': 1, 'shape': 1 }, 'col2': { 'dtype': DenseVector, 'intermediate_format': constants.ARRAY, 'max_size': 10, 'shape': 10 }, 'label': { 'dtype': float, 'intermediate_format': constants.NOCHANGE, 'max_size': 1, 'shape': 1 }, } reader = dummy_reader() feature_columns = ['col1', 'col2'] label_columns = ['label'] sample_weight_col = 'sample_weight' input_shapes = [[-1, 1], [-1, 2, 5]] output_shapes = [[-1, 2, 4]] batch_generator = BareKerasUtil._batch_generator_fn( feature_columns, label_columns, sample_weight_col, input_shapes, output_shapes, batch_size, metadata) for shuffle in [True, False]: batch_gen = batch_generator(reader, shuffle_buffer_size, shuffle=shuffle) for _ in range(10): batch = next(batch_gen) assert batch[0][0][0].shape == (1, ) assert batch[0][1][0].shape == (2, 5) assert batch[1][0][0].shape == (2, 4) # sample weight has to be a singel np array with shape (batch_size,) assert batch[2][0].shape == (batch_size, )