Ejemplo n.º 1
0
  def expand(self, analyzer_input_values):

    def extract_and_wrap_as_tensor_value(outputs, index, is_asset):
      return _TensorValue(outputs[index], is_asset)

    # For each analyzer output, look up its input values (by tensor name)
    # and run the analyzer on these values.
    result = {}
    for analyzer in self._analyzers:
      temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir)
      tf.gfile.MkDir(temp_assets_dir)
      outputs_pcoll = (
          analyzer_input_values
          | 'ExtractInputs[%s]' % analyzer.name >> beam.Map(
              lambda batch, keys: [batch[key] for key in keys],
              keys=analyzer.input_tensor_names)
          | 'Analyze[%s]' % analyzer.name >> analyzer_impls._AnalyzerImpl(
              analyzer.spec, temp_assets_dir))
      # pylint: enable=protected-access

      for index, (name, is_asset) in enumerate(analyzer.output_infos):
        wrapped_output = outputs_pcoll | (
            'ExtractAndWrapAsTensorValue[%s][%d]' % (name, index) >>
            beam.Map(extract_and_wrap_as_tensor_value, index, is_asset))
        result[name] = wrapped_output
    return result
Ejemplo n.º 2
0
  def expand(self, analyzer_input_values):
    def extract_and_wrap_as_tensor_value(outputs, index, numpy_dtype, is_asset):
      return _TensorValue(np.asarray(outputs[index], numpy_dtype), is_asset)

    # For each analyzer output, look up its input values (by tensor name)
    # and run the analyzer on these values.
    result = {}
    for analyzer in self._analyzers:
      temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir)
      tf.gfile.MkDir(temp_assets_dir)
      assert len(analyzer.inputs) == 1
      outputs_pcoll = (
          analyzer_input_values
          | 'ExtractInput[%s]' % analyzer.name >> beam.Map(
              lambda batch, key: batch[key],
              key=analyzer.inputs[0].name)
          | 'Analyze[%s]' % analyzer.name >> analyzer_impls._AnalyzerImpl(
              analyzer.spec, temp_assets_dir))
      # pylint: enable=protected-access

      for index, tensor in enumerate(analyzer.outputs):
        is_asset = analyzer.output_is_asset(tensor)
        wrapped_output = outputs_pcoll | (
            'ExtractAndWrapAsTensorValue[%s][%d]' % (analyzer.name, index)
            >> beam.Map(extract_and_wrap_as_tensor_value, index,
                        tensor.dtype.as_numpy_dtype, is_asset))
        result[tensor.name] = wrapped_output
    return result
Ejemplo n.º 3
0
    def expand(self, inputs):
        input_values, tensor_pcoll_mapping = (
            self._maybe_deep_copy_pcollection_inputs(inputs))

        saved_model_dir = (tensor_pcoll_mapping
                           | 'CreateSavedModelForAnalyzerInputs' >>
                           _ReplaceTensorsWithConstants(
                               self._unbound_saved_model_dir,
                               self._base_temp_dir, input_values.pipeline))

        # Run this saved model on the input dataset to obtain the inputs to the
        # analyzers.
        analyzer_input_values = (
            input_values
            | 'BatchAnalyzerInputs' >> _BatchElements()
            | 'ComputeAnalyzerInputs' >> beam.ParDo(
                _RunMetaGraphDoFn(
                    self._input_schema,
                    self._serialized_tf_config,
                    shared_graph_state_handle=shared.Shared(),
                    passthrough_keys=Context.get_passthrough_keys()),
                saved_model_dir=beam.pvalue.AsSingleton(saved_model_dir)))

        # For each analyzer output, look up its input values (by tensor name)
        # and run the analyzer on these values.
        result = {}
        for analyzer_info in self._analyzer_infos:
            temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir)
            tf.gfile.MkDir(temp_assets_dir)
            output_pcolls = (
                analyzer_input_values
                | 'ExtractInputs[%s]' % analyzer_info.name >> beam.Map(
                    lambda batch, keys: [batch[key] for key in keys],
                    keys=analyzer_info.input_tensor_names)
                | 'Analyze[%s]' % analyzer_info.name >>
                analyzer_impls._AnalyzerImpl(analyzer_info.spec,
                                             temp_assets_dir))
            # pylint: enable=protected-access

            if len(output_pcolls) != len(analyzer_info.output_infos):
                raise ValueError(
                    'Analyzer {} has {} outputs but its implementation produced {} '
                    'pcollections'.format(analyzer_info.name,
                                          len(analyzer_info.output_infos),
                                          len(output_pcolls)))

            for index, (output_pcoll, (name, is_asset)) in enumerate(
                    zip(output_pcolls, analyzer_info.output_infos)):
                result[name] = (output_pcoll
                                | 'WrapAsTensorValue[%s][%d]' %
                                (analyzer_info.name, index) >> beam.Map(
                                    _TensorValue, is_asset))
        return result
Ejemplo n.º 4
0
    def expand(self, inputs):
        input_values, tensor_pcoll_mapping = inputs

        saved_model_dir = (tensor_pcoll_mapping
                           | 'CreateSavedModelForAnalyzerInputs' >>
                           _ReplaceTensorsWithConstants(
                               self._unbound_saved_model_dir,
                               self._base_temp_dir, input_values.pipeline))

        # Run this saved model on the input dataset to obtain the inputs to the
        # analyzers.
        analyzer_input_values = (
            input_values
            | 'BatchAnalyzerInputs' >> _BatchElements()
            | 'ComputeAnalyzerInputs' >> beam.ParDo(
                _RunMetaGraphDoFn(
                    self._input_schema,
                    self._serialized_tf_config,
                    shared_graph_state_handle=shared.Shared(),
                    passthrough_keys=Context.get_passthrough_keys()),
                saved_model_dir=beam.pvalue.AsSingleton(saved_model_dir)))

        def extract_and_wrap_as_tensor_value(outputs, index, is_asset):
            return _TensorValue(outputs[index], is_asset)

        # For each analyzer output, look up its input values (by tensor name)
        # and run the analyzer on these values.
        result = {}
        for analyzer_info in self._analyzer_infos:
            temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir)
            tf.gfile.MkDir(temp_assets_dir)
            outputs_pcoll = (
                analyzer_input_values
                | 'ExtractInputs[%s]' % analyzer_info.name >> beam.Map(
                    lambda batch, keys: [batch[key] for key in keys],
                    keys=analyzer_info.input_tensor_names)
                | 'Analyze[%s]' % analyzer_info.name >>
                analyzer_impls._AnalyzerImpl(analyzer_info.spec,
                                             temp_assets_dir))
            # pylint: enable=protected-access

            for index, (name,
                        is_asset) in enumerate(analyzer_info.output_infos):
                wrapped_output = outputs_pcoll | (
                    'ExtractAndWrapAsTensorValue[%s][%d]' %
                    (name, index) >> beam.Map(extract_and_wrap_as_tensor_value,
                                              index, is_asset))
                result[name] = wrapped_output
        return result