コード例 #1
0
    def testMaterializedSliceKeys(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            slice_keys_extracts = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys(
                    [
                        slicer.SingleSliceSpec(),
                        slicer.SingleSliceSpec(columns=['gender'])
                    ],
                    materialize=True))

            def check_result(got):
                try:
                    self.assertEqual(2, len(got), 'got: %s' % got)
                    expected_results = sorted([
                        types.MaterializedColumn(
                            name=constants.SLICE_KEYS_KEY,
                            value=[b'Overall', b'gender:f']),
                        types.MaterializedColumn(
                            name=constants.SLICE_KEYS_KEY,
                            value=[b'Overall', b'gender:m'])
                    ])
                    got_results = []
                    for item in got:
                        self.assertIn(constants.SLICE_KEYS_KEY, item)
                        got_results.append(item[constants.SLICE_KEYS_KEY])
                    self.assertCountEqual(sorted(got_results),
                                          sorted(expected_results))
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(slice_keys_extracts, check_result)
コード例 #2
0
    def testSliceKeys(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            slice_keys_extracts = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['gender'])
                ]))

            def check_result(got):
                try:
                    self.assertEqual(2, len(got), 'got: %s' % got)
                    expected_results = sorted([[(), (('gender', 'f'), )],
                                               [(), (('gender', 'm'), )]])
                    got_results = []
                    for item in got:
                        self.assertIn(constants.SLICE_KEY_TYPES_KEY, item)
                        got_results.append(
                            sorted(item[constants.SLICE_KEY_TYPES_KEY]))
                    self.assertCountEqual(sorted(got_results),
                                          sorted(expected_results))
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(slice_keys_extracts, check_result)
コード例 #3
0
    def testSliceOneSlice(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            metrics = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls, reshuffle=False)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['gender'])
                ])
                | 'FanoutSlices' >> slicer.FanoutSlices())

            def check_result(got):
                try:
                    self.assertLen(got, 4)
                    expected_result = [
                        ((), wrap_fpl(fpls[0])),
                        ((), wrap_fpl(fpls[1])),
                        ((('gender', 'f'), ), wrap_fpl(fpls[0])),
                        ((('gender', 'm'), ), wrap_fpl(fpls[1])),
                    ]
                    self.assertCountEqual(got, expected_result)
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
コード例 #4
0
    def testSliceDefaultSlice(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()

            metrics = (pipeline
                       | 'CreateTestInput' >> beam.Create(fpls)
                       | 'WrapFpls' >> beam.Map(wrap_fpl)
                       |
                       'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys(
                           [slicer.SingleSliceSpec()])
                       | 'FanoutSlices' >> slicer.FanoutSlices())

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    expected_result = [
                        ((), wrap_fpl(fpls[0])),
                        ((), wrap_fpl(fpls[1])),
                    ]
                    self.assertEqual(len(got), len(expected_result))
                    self.assertTrue(got[0] == expected_result[0]
                                    and got[1] == expected_result[1]
                                    or got[1] == expected_result[0]
                                    and got[0] == expected_result[1])
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
コード例 #5
0
    def testSliceOnMetaFeature(self):
        # We want to make sure that slicing on the newly added feature works, so
        # pulling in slice here.
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            metrics = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractInterestsNum' >>
                meta_feature_extractor.ExtractMetaFeature(get_num_interests)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['num_interests'])
                ])
                | 'FanoutSlices' >> slicer.FanoutSlices())

            def check_result(got):
                try:
                    self.assertEqual(4, len(got), 'got: %s' % got)
                    expected_slice_keys = [
                        (),
                        (),
                        (('num_interests', 1), ),
                        (('num_interests', 2), ),
                    ]
                    self.assertCountEqual(
                        sorted(slice_key for slice_key, _ in got),
                        sorted(expected_slice_keys))
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
コード例 #6
0
ファイル: slice_test.py プロジェクト: zyue1105/model-analysis
    def testSliceOneSlice(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            metrics = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['gender'])
                ])
                | 'FanoutSlices' >> slice_api.FanoutSlices())

            def check_result(got):
                try:
                    self.assertEqual(4, len(got), 'got: %s' % got)
                    expected_result = [
                        ((), fpls[0]),
                        ((), fpls[1]),
                        ((('gender', 'f'), ), fpls[0]),
                        ((('gender', 'm'), ), fpls[1]),
                    ]
                    self.assertEqual(
                        sorted(got, key=lambda x: x[0]),
                        sorted(expected_result, key=lambda x: x[0]))
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
コード例 #7
0
    def testSliceKeys(self, model_names, extracts, slice_column,
                      expected_slices):
        eval_config = config.EvalConfig(
            model_specs=[config.ModelSpec(name=name) for name in model_names])
        with beam.Pipeline() as pipeline:
            slice_keys_extracts = (
                pipeline
                | 'CreateTestInput' >> beam.Create(extracts)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys(
                    [slicer.SingleSliceSpec(columns=[slice_column])],
                    eval_config=eval_config))

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    got_results = []
                    for item in got:
                        self.assertIn(constants.SLICE_KEY_TYPES_KEY, item)
                        got_results.append(
                            sorted(item[constants.SLICE_KEY_TYPES_KEY]))
                    self.assertCountEqual(got_results, expected_slices)
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(slice_keys_extracts, check_result)
コード例 #8
0
def _AutoExtractSliceKeys(  # pylint: disable=invalid-name
        extracts: beam.pvalue.PCollection,
        slice_spec: List[slicer.SingleSliceSpec],
        statistics: statistics_pb2.DatasetFeatureStatisticsList,
        materialize: bool = True) -> beam.pvalue.PCollection:
    return (extracts
            | 'BucketizeNumericFeatures' >> beam.ParDo(
                _BucketizeNumericFeaturesFn(statistics))
            | 'ExtractSliceKeys' >> slice_key_extractor.ExtractSliceKeys(
                slice_spec, materialize))