def testSparse(self): indices = [[0, 0, 0], [0, 1, 1], [5, 1, 2], [3, 0, 2], [2, 4, 1], [3, 5, 1], [7, 3, 2]] values = list(range(len(indices))) shape = [max(x) + 1 for x in zip(*indices)] np.random.seed(1234) # Randomly choose ~1/2 of the rows. mask = np.random.randn(shape[0]) > 0.5 index_value_pairs = [[ind, val] for ind, val in zip(indices, values) if mask[ind[0]]] expected_indices, expected_values = zip(*index_value_pairs) sparse_series = mocks.MockSeries( "sparse_series", sparse_tensor.SparseTensor(indices, values, shape)) mask_series = mocks.MockSeries("mask", constant_op.constant(mask)) masked = sparse_series.select_rows(mask_series) with self.test_session() as sess: actual = sess.run(masked.build()) np.testing.assert_array_equal(expected_indices, actual.indices) np.testing.assert_array_equal(expected_values, actual.values) np.testing.assert_array_equal(shape, actual.dense_shape)
def test_make_list_of_column(self): col1 = mocks.MockSeries("foo", []) col2 = mocks.MockSeries("bar", []) self.assertEqual([], _make_list_of_series(None)) self.assertEqual([col1], _make_list_of_series(col1)) self.assertEqual([col1], _make_list_of_series([col1])) self.assertEqual([col1, col2], _make_list_of_series([col1, col2])) self.assertEqual([col1, col2], _make_list_of_series((col1, col2)))
def test_set_item_column_multi(self): df = setup_test_df() self.assertEqual(3, len(df)) col1 = mocks.MockSeries("QuackColumn", []) col2 = mocks.MockSeries("MooColumn", []) df["quack", "moo"] = [col1, col2] self.assertEqual(5, len(df)) col3 = df["quack"] self.assertEqual(col1, col3) col4 = df["moo"] self.assertEqual(col2, col4)
def setup_test_df(): """Create a dataframe populated with some test columns.""" df = learn.DataFrame() df["a"] = learn.TransformedSeries([mocks.MockSeries("foobar", [])], mocks.MockTwoOutputTransform( "iue", "eui", "snt"), "out1") df["b"] = learn.TransformedSeries([mocks.MockSeries("foobar", [])], mocks.MockTwoOutputTransform( "iue", "eui", "snt"), "out2") df["c"] = learn.TransformedSeries([mocks.MockSeries("foobar", [])], mocks.MockTwoOutputTransform( "iue", "eui", "snt"), "out1") return df
def setUp(self): super(ExampleParserTestCase, self).setUp() self.example1 = example_pb2.Example() text_format.Parse("features: { " " feature: { " " key: 'int_feature' " " value: { " " int64_list: { " " value: [ 21, 2, 5 ] " " } " " } " " } " " feature: { " " key: 'string_feature' " " value: { " " bytes_list: { " " value: [ 'armadillo' ] " " } " " } " " } " "} ", self.example1) self.example2 = example_pb2.Example() text_format.Parse("features: { " " feature: { " " key: 'int_feature' " " value: { " " int64_list: { " " value: [ 4, 5, 6 ] " " } " " } " " } " " feature: { " " key: 'string_feature' " " value: { " " bytes_list: { " " value: [ 'car', 'train' ] " " } " " } " " } " "} ", self.example2) self.example_column = mocks.MockSeries( "example", tf.constant( [self.example1.SerializeToString(), self.example2.SerializeToString()], dtype=tf.string, shape=[2])) self.features = (("string_feature", tf.VarLenFeature(dtype=tf.string)), ("int_feature", tf.FixedLenFeature(shape=[3], dtype=tf.int64, default_value=[0, 0, 0]))) self.expected_string_values = np.array(list(self.example1.features.feature[ "string_feature"].bytes_list.value) + list( self.example2.features.feature["string_feature"].bytes_list.value)) self.expected_string_indices = np.array([[0, 0], [1, 0], [1, 1]]) self.expected_int_feature = np.array([list(self.example1.features.feature[ "int_feature"].int64_list.value), list(self.example2.features.feature[ "int_feature"].int64_list.value)])
def test_set_item_column(self): df = setup_test_df() self.assertEqual(3, len(df)) col1 = mocks.MockSeries("QuackColumn", []) df["quack"] = col1 self.assertEqual(4, len(df)) col2 = df["quack"] self.assertEqual(col1, col2)
def test_build_multiple_output(self): col = learn.TransformedSeries([mocks.MockSeries("foobar", [])], mocks.MockTwoOutputTransform( "thb", "nth", "snt"), "out2") result = col.build() expected = mocks.MockTensor("Mock Tensor 2", dtypes.int32) self.assertEqual(expected, result)
def test_build_single_output(self): col = learn.TransformedSeries([mocks.MockSeries("foobar", [])], mocks.MockOneOutputTransform( "thb", "nth"), "out1") result = col.build() expected = "Fake Tensor 1" self.assertEqual(expected, result)
def test_set_item_column(self): df = setup_test_df() self.assertEqual(3, len(df)) col1 = mocks.MockSeries("QuackColumn", mocks.MockTensor("Tensor ", dtypes.int32)) df["quack"] = col1 self.assertEqual(4, len(df)) col2 = df["quack"] self.assertEqual(col1, col2)
def testDense(self): dense_shape = [10, 5] np.random.seed(1234) # Create a random Tensor with dense_shape. random_array = np.random.randn(*dense_shape) # Randomly choose ~1/2 of the rows. mask = np.random.randn(dense_shape[0]) > 0.5 expected_result = random_array[mask] dense_series = mocks.MockSeries("dense_series", tf.constant(random_array)) mask_series = mocks.MockSeries("mask", tf.constant(mask)) masked = dense_series.select_rows(mask_series) with self.test_session() as sess: actual_result = sess.run(masked.build()) np.testing.assert_almost_equal(expected_result, actual_result)
def test_repr(self): col = learn.TransformedSeries([mocks.MockSeries("foobar", [])], mocks.MockTwoOutputTransform( "thb", "nth", "snt"), "qux") # note params are sorted by name expected = ("MockTransform({'param_one': 'thb', 'param_three': 'snt', " "'param_two': 'nth'})" "(foobar)[qux]") self.assertEqual(expected, repr(col))
def test_call(self): t = mocks.MockTwoOutputTransform("a", "b", "c") # MockTwoOutputTransform has input valency 1 input1 = mocks.MockSeries("foobar", []) out1, out2 = t([input1]) # pylint: disable=not-callable self.assertEqual(learn.TransformedSeries, type(out1)) # self.assertEqual(out1.transform, t) # self.assertEqual(out1.output_name, "output1") self.assertEqual(learn.TransformedSeries, type(out2))
def setup_test_df_3layer(): """Create a dataframe populated with some test columns.""" df = learn.DataFrame() df["a"] = mocks.MockSeries("a_series", mocks.MockTensor("Tensor a", tf.int32)) df["b"] = mocks.MockSeries("b_series", mocks.MockSparseTensor("SparseTensor b", tf.int32)) df["c"] = mocks.MockSeries("c_series", mocks.MockTensor("Tensor c", tf.int32)) df["d"] = mocks.MockSeries("d_series", mocks.MockSparseTensor("SparseTensor d", tf.int32)) df["e"] = learn.TransformedSeries([df["a"], df["b"]], mocks.Mock2x2Transform("iue", "eui", "snt"), "out1") df["f"] = learn.TransformedSeries([df["c"], df["d"]], mocks.Mock2x2Transform("iue", "eui", "snt"), "out2") df["g"] = learn.TransformedSeries([df["e"], df["f"]], mocks.Mock2x2Transform("iue", "eui", "snt"), "out1") return df
def test_cache(self): z = mocks.MockSeries("foobar", []) t = mocks.MockTwoOutputTransform("thb", "nth", "snt") cache = {} t.build_transitive([z], cache) self.assertEqual(2, len(cache)) expected_keys = [ "MockTransform(" "{'param_one': 'thb', 'param_three': 'snt', 'param_two': 'nth'})" "(foobar)[out1]", "MockTransform(" "{'param_one': 'thb', 'param_three': 'snt', 'param_two': 'nth'})" "(foobar)[out2]" ] self.assertEqual(expected_keys, sorted(cache.keys()))
def testParse(self): parser = csv_parser.CSVParser(column_names=["col0", "col1", "col2"], default_values=["", "", 1.4]) csv_lines = ["one,two,2.5", "four,five,6.0"] csv_input = tf.constant(csv_lines, dtype=tf.string, shape=[len(csv_lines)]) csv_column = mocks.MockSeries("csv", csv_input) expected_output = [np.array([b"one", b"four"]), np.array([b"two", b"five"]), np.array([2.5, 6.0])] output_columns = parser(csv_column) self.assertEqual(3, len(output_columns)) cache = {} output_tensors = [o.build(cache) for o in output_columns] self.assertEqual(3, len(output_tensors)) with self.test_session() as sess: output = sess.run(output_tensors) for expected, actual in zip(expected_output, output): np.testing.assert_array_equal(actual, expected)
def create_no_output_series(): return learn.TransformedSeries([mocks.MockSeries("foobar", [])], mocks.MockZeroOutputTransform( "thb", "nth"), None)