Ejemplo n.º 1
0
    def testBuildDiagnosticsTable(self):
        model_location = self._exportEvalSavedModel(
            linear_classifier.simple_linear_classifier)
        eval_shared_model = types.EvalSharedModel(model_path=model_location)

        example1 = self._makeExample(age=3.0,
                                     language='english',
                                     label=1.0,
                                     slice_key='first_slice')

        with beam.Pipeline() as pipeline:
            result = (
                pipeline
                | 'CreateInput' >> beam.Create([example1.SerializeToString()])
                | 'BuildTable' >> contrib.BuildDiagnosticTable(
                    eval_shared_model=eval_shared_model))

            def check_result(got):
                self.assertEqual(1, len(got), 'got: %s' % got)
                _, extracts = got[0]

                # Values of type MaterializedColumn are emitted to signal to
                # downstream sink components to output the data to file.
                materialized_dict = dict(
                    (k, v) for k, v in extracts.items()
                    if isinstance(v, types.MaterializedColumn))
                self._assertMaterializedColumns(
                    materialized_dict,
                    {
                        # Slice key
                        'slice_key':
                        types.MaterializedColumn(name='slice_key',
                                                 value=[b'first_slice']),

                        # Features
                        'language':
                        types.MaterializedColumn(name='language',
                                                 value=[b'english']),
                        'age':
                        types.MaterializedColumn(name='age',
                                                 value=np.array(
                                                     [3.], dtype=np.float32)),

                        # Label
                        'label':
                        types.MaterializedColumn(name='label',
                                                 value=np.array(
                                                     [1.], dtype=np.float32)),
                        '__labels':
                        types.MaterializedColumn(name='__labels',
                                                 value=np.array(
                                                     [1.], dtype=np.float32)),
                    })
                self._assertMaterializedColumnsExist(materialized_dict, [
                    'logits', 'probabilities', 'classes', 'logistic',
                    'class_ids', 'materialized_slice_keys'
                ])

            util.assert_that(result, check_result)
Ejemplo n.º 2
0
    def testBuildDiagnosticsTableWithSlices(self):
        model_location = self._exportEvalSavedModel(
            linear_classifier.simple_linear_classifier)
        eval_shared_model = types.EvalSharedModel(model_path=model_location)

        example1 = self._makeExample(age=3.0,
                                     language='english',
                                     label=1.0,
                                     slice_key='first_slice')
        slice_spec = [
            slicer.SingleSliceSpec(columns=['age']),
            slicer.SingleSliceSpec(features=[('age', 3)]),
            slicer.SingleSliceSpec(columns=['age'],
                                   features=[('language', 'english')])
        ]

        with beam.Pipeline() as pipeline:
            result = (
                pipeline
                | 'CreateInput' >> beam.Create([example1.SerializeToString()])
                | 'BuildTable' >> contrib.BuildDiagnosticTable(
                    eval_shared_model, slice_spec))

            def check_result(got):
                self.assertEqual(1, len(got), 'got: %s' % got)
                _, extracts = got[0]

                # Values of type MaterializedColumn are emitted to signal to
                # downstream sink components to output the data to file.
                materialized_dict = dict(
                    (k, v) for k, v in extracts.items()
                    if isinstance(v, types.MaterializedColumn))
                self._assertMaterializedColumns(
                    materialized_dict, {
                        'materialized_slice_keys':
                        types.MaterializedColumn(
                            name='materialized_slice_keys',
                            value=[
                                b'age:3.0', b'age:3',
                                b'age_X_language:3.0_X_english'
                            ])
                    })
                self._assertMaterializedColumnsExist(materialized_dict, [
                    'logits', 'probabilities', 'classes', 'logistic',
                    'class_ids'
                ])

            util.assert_that(result, check_result)