def testMaterializedSliceKeys(self): with beam.Pipeline() as pipeline: fpls = create_fpls() slice_keys_extracts = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys( [ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ], materialize=True)) def check_result(got): try: self.assertEqual(2, len(got), 'got: %s' % got) expected_results = sorted([ types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=[b'Overall', b'gender:f']), types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=[b'Overall', b'gender:m']) ]) got_results = [] for item in got: self.assertIn(constants.SLICE_KEYS_KEY, item) got_results.append(item[constants.SLICE_KEYS_KEY]) self.assertCountEqual(sorted(got_results), sorted(expected_results)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(slice_keys_extracts, check_result)
def testSliceKeys(self): with beam.Pipeline() as pipeline: fpls = create_fpls() slice_keys_extracts = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ])) def check_result(got): try: self.assertEqual(2, len(got), 'got: %s' % got) expected_results = sorted([[(), (('gender', 'f'), )], [(), (('gender', 'm'), )]]) got_results = [] for item in got: self.assertIn(constants.SLICE_KEY_TYPES_KEY, item) got_results.append( sorted(item[constants.SLICE_KEY_TYPES_KEY])) self.assertCountEqual(sorted(got_results), sorted(expected_results)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(slice_keys_extracts, check_result)
def testSliceOneSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls, reshuffle=False) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertLen(got, 4) expected_result = [ ((), wrap_fpl(fpls[0])), ((), wrap_fpl(fpls[1])), ((('gender', 'f'), ), wrap_fpl(fpls[0])), ((('gender', 'm'), ), wrap_fpl(fpls[1])), ] self.assertCountEqual(got, expected_result) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceDefaultSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = (pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys( [slicer.SingleSliceSpec()]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertLen(got, 2) expected_result = [ ((), wrap_fpl(fpls[0])), ((), wrap_fpl(fpls[1])), ] self.assertEqual(len(got), len(expected_result)) self.assertTrue(got[0] == expected_result[0] and got[1] == expected_result[1] or got[1] == expected_result[0] and got[0] == expected_result[1]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceOnMetaFeature(self): # We want to make sure that slicing on the newly added feature works, so # pulling in slice here. with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractInterestsNum' >> meta_feature_extractor.ExtractMetaFeature(get_num_interests) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['num_interests']) ]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) expected_slice_keys = [ (), (), (('num_interests', 1), ), (('num_interests', 2), ), ] self.assertCountEqual( sorted(slice_key for slice_key, _ in got), sorted(expected_slice_keys)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceOneSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ]) | 'FanoutSlices' >> slice_api.FanoutSlices()) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) expected_result = [ ((), fpls[0]), ((), fpls[1]), ((('gender', 'f'), ), fpls[0]), ((('gender', 'm'), ), fpls[1]), ] self.assertEqual( sorted(got, key=lambda x: x[0]), sorted(expected_result, key=lambda x: x[0])) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceKeys(self, model_names, extracts, slice_column, expected_slices): eval_config = config.EvalConfig( model_specs=[config.ModelSpec(name=name) for name in model_names]) with beam.Pipeline() as pipeline: slice_keys_extracts = ( pipeline | 'CreateTestInput' >> beam.Create(extracts) | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys( [slicer.SingleSliceSpec(columns=[slice_column])], eval_config=eval_config)) def check_result(got): try: self.assertLen(got, 2) got_results = [] for item in got: self.assertIn(constants.SLICE_KEY_TYPES_KEY, item) got_results.append( sorted(item[constants.SLICE_KEY_TYPES_KEY])) self.assertCountEqual(got_results, expected_slices) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(slice_keys_extracts, check_result)
def _AutoExtractSliceKeys( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, slice_spec: List[slicer.SingleSliceSpec], statistics: statistics_pb2.DatasetFeatureStatisticsList, materialize: bool = True) -> beam.pvalue.PCollection: return (extracts | 'BucketizeNumericFeatures' >> beam.ParDo( _BucketizeNumericFeaturesFn(statistics)) | 'ExtractSliceKeys' >> slice_key_extractor.ExtractSliceKeys( slice_spec, materialize))