def expand(self, analyzer_input_values): def extract_and_wrap_as_tensor_value(outputs, index, is_asset): return _TensorValue(outputs[index], is_asset) # For each analyzer output, look up its input values (by tensor name) # and run the analyzer on these values. result = {} for analyzer in self._analyzers: temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir) tf.gfile.MkDir(temp_assets_dir) outputs_pcoll = ( analyzer_input_values | 'ExtractInputs[%s]' % analyzer.name >> beam.Map( lambda batch, keys: [batch[key] for key in keys], keys=analyzer.input_tensor_names) | 'Analyze[%s]' % analyzer.name >> analyzer_impls._AnalyzerImpl( analyzer.spec, temp_assets_dir)) # pylint: enable=protected-access for index, (name, is_asset) in enumerate(analyzer.output_infos): wrapped_output = outputs_pcoll | ( 'ExtractAndWrapAsTensorValue[%s][%d]' % (name, index) >> beam.Map(extract_and_wrap_as_tensor_value, index, is_asset)) result[name] = wrapped_output return result
def expand(self, analyzer_input_values): def extract_and_wrap_as_tensor_value(outputs, index, numpy_dtype, is_asset): return _TensorValue(np.asarray(outputs[index], numpy_dtype), is_asset) # For each analyzer output, look up its input values (by tensor name) # and run the analyzer on these values. result = {} for analyzer in self._analyzers: temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir) tf.gfile.MkDir(temp_assets_dir) assert len(analyzer.inputs) == 1 outputs_pcoll = ( analyzer_input_values | 'ExtractInput[%s]' % analyzer.name >> beam.Map( lambda batch, key: batch[key], key=analyzer.inputs[0].name) | 'Analyze[%s]' % analyzer.name >> analyzer_impls._AnalyzerImpl( analyzer.spec, temp_assets_dir)) # pylint: enable=protected-access for index, tensor in enumerate(analyzer.outputs): is_asset = analyzer.output_is_asset(tensor) wrapped_output = outputs_pcoll | ( 'ExtractAndWrapAsTensorValue[%s][%d]' % (analyzer.name, index) >> beam.Map(extract_and_wrap_as_tensor_value, index, tensor.dtype.as_numpy_dtype, is_asset)) result[tensor.name] = wrapped_output return result
def expand(self, inputs): input_values, tensor_pcoll_mapping = ( self._maybe_deep_copy_pcollection_inputs(inputs)) saved_model_dir = (tensor_pcoll_mapping | 'CreateSavedModelForAnalyzerInputs' >> _ReplaceTensorsWithConstants( self._unbound_saved_model_dir, self._base_temp_dir, input_values.pipeline)) # Run this saved model on the input dataset to obtain the inputs to the # analyzers. analyzer_input_values = ( input_values | 'BatchAnalyzerInputs' >> _BatchElements() | 'ComputeAnalyzerInputs' >> beam.ParDo( _RunMetaGraphDoFn( self._input_schema, self._serialized_tf_config, shared_graph_state_handle=shared.Shared(), passthrough_keys=Context.get_passthrough_keys()), saved_model_dir=beam.pvalue.AsSingleton(saved_model_dir))) # For each analyzer output, look up its input values (by tensor name) # and run the analyzer on these values. result = {} for analyzer_info in self._analyzer_infos: temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir) tf.gfile.MkDir(temp_assets_dir) output_pcolls = ( analyzer_input_values | 'ExtractInputs[%s]' % analyzer_info.name >> beam.Map( lambda batch, keys: [batch[key] for key in keys], keys=analyzer_info.input_tensor_names) | 'Analyze[%s]' % analyzer_info.name >> analyzer_impls._AnalyzerImpl(analyzer_info.spec, temp_assets_dir)) # pylint: enable=protected-access if len(output_pcolls) != len(analyzer_info.output_infos): raise ValueError( 'Analyzer {} has {} outputs but its implementation produced {} ' 'pcollections'.format(analyzer_info.name, len(analyzer_info.output_infos), len(output_pcolls))) for index, (output_pcoll, (name, is_asset)) in enumerate( zip(output_pcolls, analyzer_info.output_infos)): result[name] = (output_pcoll | 'WrapAsTensorValue[%s][%d]' % (analyzer_info.name, index) >> beam.Map( _TensorValue, is_asset)) return result
def expand(self, inputs): input_values, tensor_pcoll_mapping = inputs saved_model_dir = (tensor_pcoll_mapping | 'CreateSavedModelForAnalyzerInputs' >> _ReplaceTensorsWithConstants( self._unbound_saved_model_dir, self._base_temp_dir, input_values.pipeline)) # Run this saved model on the input dataset to obtain the inputs to the # analyzers. analyzer_input_values = ( input_values | 'BatchAnalyzerInputs' >> _BatchElements() | 'ComputeAnalyzerInputs' >> beam.ParDo( _RunMetaGraphDoFn( self._input_schema, self._serialized_tf_config, shared_graph_state_handle=shared.Shared(), passthrough_keys=Context.get_passthrough_keys()), saved_model_dir=beam.pvalue.AsSingleton(saved_model_dir))) def extract_and_wrap_as_tensor_value(outputs, index, is_asset): return _TensorValue(outputs[index], is_asset) # For each analyzer output, look up its input values (by tensor name) # and run the analyzer on these values. result = {} for analyzer_info in self._analyzer_infos: temp_assets_dir = _make_unique_temp_dir(self._base_temp_dir) tf.gfile.MkDir(temp_assets_dir) outputs_pcoll = ( analyzer_input_values | 'ExtractInputs[%s]' % analyzer_info.name >> beam.Map( lambda batch, keys: [batch[key] for key in keys], keys=analyzer_info.input_tensor_names) | 'Analyze[%s]' % analyzer_info.name >> analyzer_impls._AnalyzerImpl(analyzer_info.spec, temp_assets_dir)) # pylint: enable=protected-access for index, (name, is_asset) in enumerate(analyzer_info.output_infos): wrapped_output = outputs_pcoll | ( 'ExtractAndWrapAsTensorValue[%s][%d]' % (name, index) >> beam.Map(extract_and_wrap_as_tensor_value, index, is_asset)) result[name] = wrapped_output return result