"unicode_feature": np.array([u"ghi"], dtype=np.object), }, ], expected_output={ "int64_feature": pa.array([[1, 2, 3], [4]], type=pa.list_(pa.int64())), "uint64_feature": pa.array([[1, 2, 3], None], type=pa.list_(pa.uint64())), "int32_feature": pa.array([[1, 2, 3], [4]], type=pa.list_(pa.int32())), "uint32_feature": pa.array([[1, 2, 3], None], type=pa.list_(pa.uint32())), "float_feature": pa.array([[1.], [2., 3., 4.]], type=pa.list_(pa.float32())), "double_feature": pa.array([[1.], [2., 3., 4.]], type=pa.list_(pa.float64())), "bytes_feature": pa.array([[b"abc", b"def"], [b"ghi"]], type=pa.list_(pa.binary())), "unicode_feature": pa.array([[b"abc", b"def"], [b"ghi"]], type=pa.list_(pa.string())), }), dict(testcase_name="mixed_unicode_and_bytes", input_examples=[ { "a": np.array([b"abc"], dtype=np.object), }, { "a": np.array([u"def"], dtype=np.object), },
def testNumberArrayWithNone(self): float_array = pa.array([1.0, 2.0, None], pa.float64()) np_array = arrow_util.primitive_array_to_numpy(float_array) self.assertEqual(np_array.dtype, np.float64) np.testing.assert_array_equal(np_array, [1.0, 2.0, np.NaN])
def setUp(self): super(NonStreamingCustomStatsGeneratorTest, self).setUp() # Integration tests involving Beam and AMI are challenging to write # because Beam PCollections are unordered while the results of adjusted MI # depend on the order of the data for small datasets. This test case tests # MI with one label which will give a value of 0 regardless of # the ordering of elements in the PCollection. The purpose of this test is # to ensure that the Mutual Information pipeline is able to handle a # variety of input types. Unit tests ensuring correctness of the MI value # itself are included in sklearn_mutual_information_test. # fa is categorical, fb is numeric, fc is multivalent and fd has null values self.tables = [ pa.Table.from_arrays([ pa.array([['Red']]), pa.array([[1.0]]), pa.array([[1, 3, 1]]), pa.array([[0.4]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Green']]), pa.array([[2.2]]), pa.array([[2, 6]]), pa.array([[0.4]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Blue']]), pa.array([[3.3]]), pa.array([[4, 6]]), pa.array([[0.3]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Green']]), pa.array([[1.3]]), pa.array([None]), pa.array([[0.2]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Red']]), pa.array([[1.2]]), pa.array([[1]]), pa.array([[0.3]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Blue']]), pa.array([[0.5]]), pa.array([[3, 2]]), pa.array([[0.4]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Blue']]), pa.array([[1.3]]), pa.array([[1, 4]]), pa.array([[1.7]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Green']]), pa.array([[2.3]]), pa.array([[0]]), pa.array([[np.NaN]], type=pa.list_(pa.float64())), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), pa.Table.from_arrays([ pa.array([['Green']]), pa.array([[0.3]]), pa.array([[3]]), pa.array([[4.4]]), pa.array([['Label']]), ], ['fa', 'fb', 'fc', 'fd', 'label_key']), ] self.schema = text_format.Parse( """ feature { name: "fa" type: BYTES shape { dim { size: 1 } } } feature { name: "fb" type: FLOAT shape { dim { size: 1 } } } feature { name: "fc" type: INT value_count: { min: 0 max: 2 } } feature { name: "fd" type: FLOAT shape { dim { size: 1 } } } feature { name: "label_key" type: BYTES shape { dim { size: 1 } } }""", schema_pb2.Schema())