コード例 #1
0
              "unicode_feature": np.array([u"ghi"], dtype=np.object),
          },
      ],
      expected_output={
          "int64_feature":
          pa.array([[1, 2, 3], [4]], type=pa.list_(pa.int64())),
          "uint64_feature":
          pa.array([[1, 2, 3], None], type=pa.list_(pa.uint64())),
          "int32_feature":
          pa.array([[1, 2, 3], [4]], type=pa.list_(pa.int32())),
          "uint32_feature":
          pa.array([[1, 2, 3], None], type=pa.list_(pa.uint32())),
          "float_feature":
          pa.array([[1.], [2., 3., 4.]], type=pa.list_(pa.float32())),
          "double_feature":
          pa.array([[1.], [2., 3., 4.]], type=pa.list_(pa.float64())),
          "bytes_feature":
          pa.array([[b"abc", b"def"], [b"ghi"]],
                   type=pa.list_(pa.binary())),
          "unicode_feature":
          pa.array([[b"abc", b"def"], [b"ghi"]],
                   type=pa.list_(pa.string())),
      }),
 dict(testcase_name="mixed_unicode_and_bytes",
      input_examples=[
          {
              "a": np.array([b"abc"], dtype=np.object),
          },
          {
              "a": np.array([u"def"], dtype=np.object),
          },
コード例 #2
0
 def testNumberArrayWithNone(self):
     float_array = pa.array([1.0, 2.0, None], pa.float64())
     np_array = arrow_util.primitive_array_to_numpy(float_array)
     self.assertEqual(np_array.dtype, np.float64)
     np.testing.assert_array_equal(np_array, [1.0, 2.0, np.NaN])
コード例 #3
0
  def setUp(self):
    super(NonStreamingCustomStatsGeneratorTest, self).setUp()
    # Integration tests involving Beam and AMI are challenging to write
    # because Beam PCollections are unordered while the results of adjusted MI
    # depend on the order of the data for small datasets. This test case tests
    # MI with one label which will give a value of 0 regardless of
    # the ordering of elements in the PCollection. The purpose of this test is
    # to ensure that the Mutual Information pipeline is able to handle a
    # variety of input types. Unit tests ensuring correctness of the MI value
    # itself are included in sklearn_mutual_information_test.

    # fa is categorical, fb is numeric, fc is multivalent and fd has null values
    self.tables = [
        pa.Table.from_arrays([
            pa.array([['Red']]),
            pa.array([[1.0]]),
            pa.array([[1, 3, 1]]),
            pa.array([[0.4]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Green']]),
            pa.array([[2.2]]),
            pa.array([[2, 6]]),
            pa.array([[0.4]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Blue']]),
            pa.array([[3.3]]),
            pa.array([[4, 6]]),
            pa.array([[0.3]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Green']]),
            pa.array([[1.3]]),
            pa.array([None]),
            pa.array([[0.2]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Red']]),
            pa.array([[1.2]]),
            pa.array([[1]]),
            pa.array([[0.3]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Blue']]),
            pa.array([[0.5]]),
            pa.array([[3, 2]]),
            pa.array([[0.4]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Blue']]),
            pa.array([[1.3]]),
            pa.array([[1, 4]]),
            pa.array([[1.7]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Green']]),
            pa.array([[2.3]]),
            pa.array([[0]]),
            pa.array([[np.NaN]], type=pa.list_(pa.float64())),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
        pa.Table.from_arrays([
            pa.array([['Green']]),
            pa.array([[0.3]]),
            pa.array([[3]]),
            pa.array([[4.4]]),
            pa.array([['Label']]),
        ], ['fa', 'fb', 'fc', 'fd', 'label_key']),
    ]

    self.schema = text_format.Parse(
        """
        feature {
          name: "fa"
          type: BYTES
          shape {
            dim {
              size: 1
            }
          }
        }
        feature {
          name: "fb"
          type: FLOAT
          shape {
            dim {
              size: 1
            }
          }
        }
        feature {
          name: "fc"
          type: INT
          value_count: {
            min: 0
            max: 2
          }
        }
        feature {
          name: "fd"
          type: FLOAT
          shape {
            dim {
              size: 1
            }
          }
        }
        feature {
          name: "label_key"
          type: BYTES
          shape {
            dim {
              size: 1
            }
          }
        }""", schema_pb2.Schema())