コード例 #1
0
ファイル: builder_impl.py プロジェクト: zubrabubra/tensorflow
    def __init__(self, export_dir, overwrite=False):
        """Construct a SavedModelBuilder.

    Args:
      export_dir: The directory to export to
      overwrite: If `True` deletes export_dir if it already exists
    """
        self._saved_model = saved_model_pb2.SavedModel()
        self._saved_model.saved_model_schema_version = (
            constants.SAVED_MODEL_SCHEMA_VERSION)

        self._export_dir = export_dir
        if file_io.file_exists(export_dir):
            if overwrite:
                file_io.delete_recursively(export_dir)
            else:
                raise AssertionError(
                    "Export directory already exists. Please specify a different export "
                    "directory: %s or change overwrite flag to `True`" %
                    export_dir)

        file_io.recursive_create_dir(self._export_dir)

        # Boolean to track whether variables and assets corresponding to the
        # SavedModel have been saved. Specifically, the first meta graph to be added
        # MUST use the add_meta_graph_and_variables() API. Subsequent add operations
        # on the SavedModel MUST use the add_meta_graph() API which does not save
        # weights.
        self._has_saved_variables = False
コード例 #2
0
  def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s", assets_destination_dir)
コード例 #3
0
ファイル: save.py プロジェクト: terrytangyuan/tensorflow
def _write_object_graph(saveable_view, export_dir, asset_file_def_index):
  """Save a SavedObjectGraph proto for `root`."""
  # SavedObjectGraph is similar to the CheckpointableObjectGraph proto in the
  # checkpoint. It will eventually go into the SavedModel.
  proto = saved_object_graph_pb2.SavedObjectGraph()
  saveable_view.fill_object_graph_proto(proto)

  coder = nested_structure_coder.StructureCoder()
  for concrete_function in saveable_view.concrete_functions:
    serialized = function_serialization.serialize_concrete_function(
        concrete_function, saveable_view.captured_tensor_node_ids, coder)
    if serialized is not None:
      proto.concrete_functions[concrete_function.name].CopyFrom(
          serialized)

  for obj, obj_proto in zip(saveable_view.nodes, proto.nodes):
    _write_object_proto(obj, obj_proto, asset_file_def_index)

  extra_asset_dir = os.path.join(
      compat.as_bytes(export_dir),
      compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY))
  file_io.recursive_create_dir(extra_asset_dir)
  object_graph_filename = os.path.join(
      extra_asset_dir, compat.as_bytes("object_graph.pb"))
  file_io.write_string_to_file(object_graph_filename, proto.SerializeToString())
コード例 #4
0
ファイル: logs.py プロジェクト: subspace-ai/subdag
 def maybe_create_dir(self):
     d = "{prefix}/runs/{run_id}/_logs/{nr}".format(
         prefix=os.environ["PREFIX"],
         run_id=self.run_id,
         nr=self.latest_nr,
     )
     io.recursive_create_dir(d)
コード例 #5
0
 def testCreateRecursiveDir(self):
     dir_path = os.path.join(self.get_temp_dir(),
                             "temp_dir/temp_dir1/temp_dir2")
     file_io.recursive_create_dir(dir_path)
     file_path = os.path.join(dir_path, "temp_file")
     file_io.write_string_to_file(file_path, "testing")
     self.assertTrue(file_io.file_exists(file_path))
コード例 #6
0
  def save(self, as_text=False):
    """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to disk.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
    if not file_io.file_exists(self._export_dir):
      file_io.recursive_create_dir(self._export_dir)

    if as_text:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
      file_io.write_string_to_file(path, str(self._saved_model))
    else:
      path = os.path.join(
          compat.as_bytes(self._export_dir),
          compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
      file_io.write_string_to_file(path, self._saved_model.SerializeToString())
    tf_logging.info("SavedModel written to: %s", path)

    return path
コード例 #7
0
def main(argv=None):
  args = parse_arguments(sys.argv if argv is None else argv)

  if args.schema:
    schema = json.loads(
        file_io.read_file_to_string(args.schema).decode())
  else:
    import google.datalab.bigquery as bq
    schema = bq.Table(args.bigquery).schema._bq_schema
  features = json.loads(
      file_io.read_file_to_string(args.features).decode())

  file_io.recursive_create_dir(args.output)

  if args.cloud:
    run_cloud_analysis(
        output_dir=args.output,
        csv_file_pattern=args.csv,
        bigquery_table=args.bigquery,
        schema=schema,
        features=features)
  else:
    feature_analysis.run_local_analysis(
        output_dir=args.output,
        csv_file_pattern=args.csv,
        schema=schema,
        features=features)
コード例 #8
0
ファイル: analyze.py プロジェクト: googledatalab/pydatalab
def main(argv=None):
  args = parse_arguments(sys.argv if argv is None else argv)

  if args.schema:
    schema = json.loads(
        file_io.read_file_to_string(args.schema).decode())
  else:
    import google.datalab.bigquery as bq
    schema = bq.Table(args.bigquery).schema._bq_schema
  features = json.loads(
      file_io.read_file_to_string(args.features).decode())

  file_io.recursive_create_dir(args.output)

  if args.cloud:
    run_cloud_analysis(
        output_dir=args.output,
        csv_file_pattern=args.csv,
        bigquery_table=args.bigquery,
        schema=schema,
        features=features)
  else:
    feature_analysis.run_local_analysis(
        output_dir=args.output,
        csv_file_pattern=args.csv,
        schema=schema,
        features=features)
コード例 #9
0
 def _maybe_create_builder(self, builder, sess, export_dir, inputs, outputs):
     """
     hacky, but doesn't create a new savedmodelbuilder witch each call, but instead 
     overwrites export_dir in the SavedModelBuilder. 
     """
     if builder:
         if file_io.file_exists(export_dir):
             if file_io.list_directory(export_dir):
                 raise AssertionError(
                     "Export directory already exists, and isn't empty. Please choose "
                     "a different export directory, or delete all the contents of the "
                     "specified directory: %s" % export_dir)
         else:
             file_io.recursive_create_dir(export_dir)
         
         builder._export_dir = export_dir
         return builder
     else:
         builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_dir=export_dir)
         signature_def_map = {
             tf.compat.v1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                 tf.compat.v1.saved_model.signature_def_utils.predict_signature_def(inputs, outputs)
         }
         assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)
         builder.add_meta_graph_and_variables(sess,
                                         tags= [tf.saved_model.tag_constants.SERVING],
                                         signature_def_map=signature_def_map,
                                         assets_collection=assets_collection,
                                         saver=self.saver)
     return builder
コード例 #10
0
ファイル: builder.py プロジェクト: HowieYang0/notmnist-ex
    def save(self, as_text=False):
        """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to disk.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
        if not file_io.file_exists(self._export_dir):
            file_io.recursive_create_dir(self._export_dir)

        if as_text:
            path = os.path.join(
                compat.as_bytes(self._export_dir),
                compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
            file_io.write_string_to_file(path, str(self._saved_model))
        else:
            path = os.path.join(
                compat.as_bytes(self._export_dir),
                compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
            file_io.write_string_to_file(path,
                                         self._saved_model.SerializeToString())
        tf_logging.info("SavedModel written to: %s", path)

        return path
コード例 #11
0
def _get_or_create_assets_dir(export_dir):
    """Return assets sub-directory, or create one if it doesn't exist."""
    assets_destination_dir = _get_assets_dir(export_dir)

    file_io.recursive_create_dir(assets_destination_dir)

    return assets_destination_dir
コード例 #12
0
ファイル: task.py プロジェクト: abhinavrpatel/pydatalab
def local_analysis(args):
    if args.analysis:
        # Already analyzed.
        return

    if not args.schema or not args.features:
        raise ValueError(
            'Either --analysis, or both --schema and --features are provided.')

    tf_config = json.loads(os.environ.get('TF_CONFIG', '{}'))
    cluster_spec = tf_config.get('cluster', {})
    if len(cluster_spec.get('worker', [])) > 0:
        raise ValueError(
            'If "schema" and "features" are provided, local analysis will run and '
            + 'only BASIC scale-tier (no workers node) is supported.')

    if cluster_spec and not (args.schema.startswith('gs://')
                             and args.features.startswith('gs://')):
        raise ValueError(
            'Cloud trainer requires GCS paths for --schema and --features.')

    print('Running analysis.')
    schema = json.loads(file_io.read_file_to_string(args.schema).decode())
    features = json.loads(file_io.read_file_to_string(args.features).decode())
    args.analysis = os.path.join(args.job_dir, 'analysis')
    args.transform = True
    file_io.recursive_create_dir(args.analysis)
    feature_analysis.run_local_analysis(args.analysis, args.train, schema,
                                        features)
    print('Analysis done.')
コード例 #13
0
ファイル: save.py プロジェクト: rmlarsen/tensorflow
def _write_object_graph(saveable_view, export_dir, asset_file_def_index):
  """Save a SavedObjectGraph proto for `root`."""
  # SavedObjectGraph is similar to the CheckpointableObjectGraph proto in the
  # checkpoint. It will eventually go into the SavedModel.
  proto = saved_object_graph_pb2.SavedObjectGraph()
  saveable_view.fill_object_graph_proto(proto)

  node_ids = util.ObjectIdentityDictionary()
  for i, obj in enumerate(saveable_view.nodes):
    node_ids[obj] = i
    if resource_variable_ops.is_resource_variable(obj):
      node_ids[obj.handle] = i
    elif isinstance(obj, tracking.TrackableAsset):
      node_ids[obj.asset_path.handle] = i

  for obj, obj_proto in zip(saveable_view.nodes, proto.nodes):
    _write_object_proto(obj, obj_proto, asset_file_def_index, node_ids)

  extra_asset_dir = os.path.join(
      compat.as_bytes(export_dir),
      compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY))
  file_io.recursive_create_dir(extra_asset_dir)
  object_graph_filename = os.path.join(
      extra_asset_dir, compat.as_bytes("object_graph.pb"))
  file_io.write_string_to_file(object_graph_filename, proto.SerializeToString())
コード例 #14
0
ファイル: save.py プロジェクト: xiyueZhang/tensorflow
def _write_object_graph(root, export_dir, asset_file_def_index):
    """Save a SavedObjectGraph proto for `root`."""
    # SavedObjectGraph is similar to the CheckpointableObjectGraph proto in the
    # checkpoint. It will eventually go into the SavedModel.
    proto = saved_object_graph_pb2.SavedObjectGraph()

    checkpointable_objects, node_ids, slot_variables = util.find_objects(root)
    util.fill_object_graph_proto(checkpointable_objects, node_ids,
                                 slot_variables, proto)

    node_ids = util.ObjectIdentityDictionary()
    for i in range(len(checkpointable_objects)):
        obj = checkpointable_objects[i]
        node_ids[obj] = i
        if resource_variable_ops.is_resource_variable(obj):
            node_ids[obj.handle] = i
        elif isinstance(obj, tracking.TrackableAsset):
            node_ids[obj.asset_path.handle] = i

    for obj, obj_proto in zip(checkpointable_objects, proto.nodes):
        _write_object_proto(obj, obj_proto, asset_file_def_index)

    function_serialization.add_polymorphic_functions_to_object_graph_proto(
        checkpointable_objects, proto, node_ids)

    extra_asset_dir = os.path.join(
        compat.as_bytes(export_dir),
        compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY))
    file_io.recursive_create_dir(extra_asset_dir)
    object_graph_filename = os.path.join(extra_asset_dir,
                                         compat.as_bytes("object_graph.pb"))
    file_io.write_string_to_file(object_graph_filename,
                                 proto.SerializeToString())
コード例 #15
0
    def testWriteTransformFn(self):
        path = os.path.join(self.get_temp_dir(), 'output')

        with beam.Pipeline() as pipeline:
            # Create an empty directory for the source saved model dir.
            saved_model_dir = os.path.join(self.get_temp_dir(), 'source')
            file_io.recursive_create_dir(saved_model_dir)
            saved_model_dir_pcoll = (
                pipeline
                | 'CreateSavedModelDir' >> beam.Create([saved_model_dir]))
            metadata = _TEST_METADATA
            deferred_metadata = (
                pipeline
                | 'CreateEmptyProperties' >> beam.Create([_FUTURES_DICT]))

            _ = ((saved_model_dir_pcoll, (metadata, deferred_metadata))
                 | transform_fn_io.WriteTransformFn(path))

        transformed_metadata_dir = os.path.join(path, 'transformed_metadata')
        metadata = metadata_io.read_metadata(transformed_metadata_dir)
        self.assertEqual(metadata, _TEST_METADATA)

        transform_fn_dir = os.path.join(path, 'transform_fn')
        self.assertTrue(file_io.file_exists(transform_fn_dir))
        self.assertTrue(file_io.is_directory(transform_fn_dir))
コード例 #16
0
ファイル: builder.py プロジェクト: Qstar/tensorflow
  def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_source_filepath_list = self._save_assets(assets_collection_to_add)

    # Return if there are no assets to write.
    if len(asset_source_filepath_list) is 0:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_source_filepath in asset_source_filepath_list:
      asset_source_filename = os.path.basename(asset_source_filepath)

      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_source_filename))
      file_io.copy(
          asset_source_filepath, asset_destination_filepath, overwrite=True)

    tf_logging.info("Assets written to: %s", assets_destination_dir)
コード例 #17
0
ファイル: test_analyze.py プロジェクト: javiervicho/pydatalab
  def test_numerics(self):
    test_folder = os.path.join(self._bucket_root, 'test_numerics')
    input_file_path = os.path.join(test_folder, 'input.csv')
    output_folder = os.path.join(test_folder, 'test_output')
    file_io.recursive_create_dir(output_folder)

    file_io.write_string_to_file(
      input_file_path,
      '\n'.join(['%s,%s' % (i, 10 * i + 0.5) for i in range(100)]))

    schema = [{'name': 'col1', 'type': 'INTEGER'},
              {'name': 'col2', 'type': 'FLOAT'}]
    features = {'col1': {'transform': 'scale', 'source_column': 'col1'},
                'col2': {'transform': 'identity', 'source_column': 'col2'}}
    analyze.run_cloud_analysis(
        output_dir=output_folder,
        csv_file_pattern=input_file_path,
        bigquery_table=None,
        schema=schema,
        inverted_features=analyze.invert_features(features))

    stats = json.loads(
        file_io.read_file_to_string(
            os.path.join(output_folder, analyze.constant.STATS_FILE)).decode())

    self.assertEqual(stats['num_examples'], 100)
    col = stats['column_stats']['col1']
    self.assertAlmostEqual(col['max'], 99.0)
    self.assertAlmostEqual(col['min'], 0.0)
    self.assertAlmostEqual(col['mean'], 49.5)

    col = stats['column_stats']['col2']
    self.assertAlmostEqual(col['max'], 990.5)
    self.assertAlmostEqual(col['min'], 0.5)
    self.assertAlmostEqual(col['mean'], 495.5)
コード例 #18
0
    def save(self, as_text=False):
        """Writes a `SavedModel` protocol buffer to disk.

    The function writes the SavedModel protocol buffer to the export directory
    in a serialized format.

    Args:
      as_text: Writes the SavedModel protocol buffer in text format to
        disk. Protocol buffers in text format are useful for debugging, but
        parsing fails when it encounters an unknown field and so is not forward
        compatible. This means changes to TensorFlow may prevent deployment of
        new text format SavedModels to existing serving binaries. Do not deploy
        `as_text` SavedModels to production.

    Returns:
      The path to which the SavedModel protocol buffer was written.
    """
        if not file_io.file_exists(self._export_dir):
            file_io.recursive_create_dir(self._export_dir)

        if as_text:
            path = os.path.join(
                compat.as_bytes(self._export_dir),
                compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT))
            file_io.write_string_to_file(path, str(self._saved_model))
        else:
            path = os.path.join(
                compat.as_bytes(self._export_dir),
                compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
            file_io.write_string_to_file(
                path, self._saved_model.SerializeToString(deterministic=True))
        tf_logging.info("SavedModel written to: %s", compat.as_text(path))

        return path
コード例 #19
0
    def _write_schema_to_disk(self, basedir, schema_string):
        version_basedir = os.path.join(basedir, 'v1-json')

        # Write a proto by hand to disk
        file_io.recursive_create_dir(version_basedir)
        file_io.write_string_to_file(
            os.path.join(version_basedir, 'schema.json'), schema_string)
コード例 #20
0
def get_or_create_debug_dir(export_dir):
    """Returns path to the debug sub-directory, creating if it does not exist."""
    debug_dir = get_debug_dir(export_dir)

    file_io.recursive_create_dir(debug_dir)

    return debug_dir
コード例 #21
0
ファイル: builder_impl.py プロジェクト: toluomoba/tensorflow
  def _save_and_write_assets(self, assets_collection_to_add=None):
    """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
    asset_filename_map = _maybe_save_assets(assets_collection_to_add)

    # Return if there are no assets to write.
    if not asset_filename_map:
      tf_logging.info("No assets to write.")
      return

    assets_destination_dir = os.path.join(
        compat.as_bytes(self._export_dir),
        compat.as_bytes(constants.ASSETS_DIRECTORY))

    if not file_io.file_exists(assets_destination_dir):
      file_io.recursive_create_dir(assets_destination_dir)

    # Copy each asset from source path to destination path.
    for asset_basename, asset_source_filepath in asset_filename_map.items():
      asset_destination_filepath = os.path.join(
          compat.as_bytes(assets_destination_dir),
          compat.as_bytes(asset_basename))

      # Only copy the asset file to the destination if it does not already
      # exist. This is to ensure that an asset with the same name defined as
      # part of multiple graphs is only copied the first time.
      if not file_io.file_exists(asset_destination_filepath):
        file_io.copy(asset_source_filepath, asset_destination_filepath)

    tf_logging.info("Assets written to: %s",
                    compat.as_text(assets_destination_dir))
コード例 #22
0
    def testWriteTransformFn(self):
        transform_output_dir = os.path.join(self.get_temp_dir(), 'output')

        with beam.Pipeline() as pipeline:
            # Create an empty directory for the source saved model dir.
            saved_model_dir = os.path.join(self.get_temp_dir(), 'source')
            file_io.recursive_create_dir(saved_model_dir)
            saved_model_dir_pcoll = (
                pipeline
                | 'CreateSavedModelDir' >> beam.Create([saved_model_dir]))
            metadata = beam_metadata_io.BeamDatasetMetadata(
                _TEST_METADATA_WITH_FUTURES, {
                    'a': pipeline | 'CreateA' >> beam.Create([3]),
                })

            _ = ((saved_model_dir_pcoll, metadata)
                 | transform_fn_io.WriteTransformFn(transform_output_dir))

        # Test reading with TFTransformOutput
        tf_transform_output = tft.TFTransformOutput(transform_output_dir)
        metadata = tf_transform_output.transformed_metadata
        self.assertEqual(metadata, _TEST_METADATA)

        transform_fn_dir = tf_transform_output.transform_savedmodel_dir
        self.assertTrue(file_io.file_exists(transform_fn_dir))
        self.assertTrue(file_io.is_directory(transform_fn_dir))
コード例 #23
0
def parse_arguments(argv):
    """Parse command line arguments.

  Args:
    argv: list of command line arguments, includeing programe name.

  Returns:
    An argparse Namespace object.
  """
    parser = argparse.ArgumentParser(
        description='Runs Preprocessing on structured CSV data.')
    parser.add_argument(
        '--input-file-pattern',
        type=str,
        required=True,
        help='Input CSV file names. May contain a file pattern')
    parser.add_argument('--output-dir',
                        type=str,
                        required=True,
                        help='Google Cloud Storage which to place outputs.')
    parser.add_argument('--schema-file',
                        type=str,
                        required=True,
                        help=('BigQuery json schema file'))

    args = parser.parse_args(args=argv[1:])

    # Make sure the output folder exists if local folder.
    file_io.recursive_create_dir(args.output_dir)

    return args
コード例 #24
0
    def _save_and_write_assets(self, assets_collection_to_add=None):
        """Saves asset to the meta graph and writes asset files to disk.

    Args:
      assets_collection_to_add: The collection where the asset paths are setup.
    """
        asset_source_filepath_list = self._save_assets(
            assets_collection_to_add)

        # Return if there are no assets to write.
        if len(asset_source_filepath_list) is 0:
            tf_logging.info("No assets to write.")
            return

        assets_destination_dir = os.path.join(
            compat.as_bytes(self._export_dir),
            compat.as_bytes(constants.ASSETS_DIRECTORY))

        if not file_io.file_exists(assets_destination_dir):
            file_io.recursive_create_dir(assets_destination_dir)

        # Copy each asset from source path to destination path.
        for asset_source_filepath in asset_source_filepath_list:
            asset_source_filename = os.path.basename(asset_source_filepath)

            asset_destination_filepath = os.path.join(
                compat.as_bytes(assets_destination_dir),
                compat.as_bytes(asset_source_filename))
            file_io.copy(asset_source_filepath,
                         asset_destination_filepath,
                         overwrite=True)

        tf_logging.info("Assets written to: %s", assets_destination_dir)
コード例 #25
0
    def testWriteTransformFn(self):
        transform_output_dir = os.path.join(self.get_temp_dir(), 'output')

        with beam.Pipeline() as pipeline:
            # Create an empty directory for the source saved model dir.
            saved_model_dir = os.path.join(self.get_temp_dir(), 'source')
            file_io.recursive_create_dir(saved_model_dir)
            saved_model_dir_pcoll = (
                pipeline
                | 'CreateSavedModelDir' >> beam.Create([saved_model_dir]))
            # Combine test metadata with a dict of PCollections resolving futures.
            deferred_metadata = pipeline | 'CreateDeferredMetadata' >> beam.Create(
                [test_metadata.COMPLETE_METADATA])
            metadata = beam_metadata_io.BeamDatasetMetadata(
                test_metadata.INCOMPLETE_METADATA, deferred_metadata)

            _ = ((saved_model_dir_pcoll, metadata)
                 | transform_fn_io.WriteTransformFn(transform_output_dir))

        # Test reading with TFTransformOutput
        tf_transform_output = tft.TFTransformOutput(transform_output_dir)
        metadata = tf_transform_output.transformed_metadata
        self.assertEqual(metadata, test_metadata.COMPLETE_METADATA)

        transform_fn_dir = tf_transform_output.transform_savedmodel_dir
        self.assertTrue(file_io.file_exists(transform_fn_dir))
        self.assertTrue(file_io.is_directory(transform_fn_dir))
コード例 #26
0
ファイル: training_util.py プロジェクト: instadeep/Mobile-ai
def write_graph(graph_def, logdir, name, as_text=True):
    """Writes a graph proto to a file.

  The graph is written as a binary proto unless `as_text` is `True`.

  ```python
  v = tf.Variable(0, name='my_variable')
  sess = tf.Session()
  tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt')
  ```

  Args:
    graph_def: A `GraphDef` protocol buffer.
    logdir: Directory where to write the graph. This can refer to remote
      filesystems, such as Google Cloud Storage (GCS).
    name: Filename for the graph.
    as_text: If `True`, writes the graph as an ASCII proto.
  """
    # gcs does not have the concept of directory at the moment.
    if not file_io.file_exists(logdir) and not logdir.startswith("gs:"):
        file_io.recursive_create_dir(logdir)
    path = os.path.join(logdir, name)
    if as_text:
        file_io.write_string_to_file(path, str(graph_def))
    else:
        file_io.write_string_to_file(path, graph_def.SerializeToString())
コード例 #27
0
def parse_arguments(argv):
  """Parse command line arguments.

  Args:
    argv: list of command line arguments, includeing programe name.

  Returns:
    An argparse Namespace object.
  """
  parser = argparse.ArgumentParser(
      description='Runs Preprocessing on structured CSV data.')
  parser.add_argument('--input-file-pattern',
                      type=str,
                      required=True,
                      help='Input CSV file names. May contain a file pattern')
  parser.add_argument('--output-dir',
                      type=str,
                      required=True,
                      help='Google Cloud Storage which to place outputs.')
  parser.add_argument('--schema-file',
                      type=str,
                      required=True,
                      help=('BigQuery json schema file'))

  args = parser.parse_args(args=argv[1:])

  # Make sure the output folder exists if local folder.
  file_io.recursive_create_dir(args.output_dir)

  return args
コード例 #28
0
    def setUp(self):
        random.seed(12321)
        self._local_dir = tempfile.mkdtemp()  # Local folder for temp files.
        self._gs_dir = 'gs://temp_pydatalab_test_%s' % uuid.uuid4().hex
        subprocess.check_call('gsutil mb %s' % self._gs_dir, shell=True)

        self._input_files = os.path.join(self._gs_dir, 'input_files')

        self._analysis_output = os.path.join(self._gs_dir, 'analysis_output')
        self._transform_output = os.path.join(self._gs_dir, 'transform_output')
        self._train_output = os.path.join(self._gs_dir, 'train_output')
        self._prediction_output = os.path.join(self._gs_dir,
                                               'prediction_output')

        file_io.recursive_create_dir(self._input_files)

        self._csv_train_filename = os.path.join(self._input_files,
                                                'train_csv_data.csv')
        self._csv_eval_filename = os.path.join(self._input_files,
                                               'eval_csv_data.csv')
        self._csv_predict_filename = os.path.join(self._input_files,
                                                  'predict_csv_data.csv')
        self._schema_filename = os.path.join(self._input_files,
                                             'schema_file.json')
        self._features_filename = os.path.join(self._input_files,
                                               'features_file.json')

        self._image_files = None
コード例 #29
0
ファイル: training_util.py プロジェクト: 2020zyc/tensorflow
def write_graph(graph_def, logdir, name, as_text=True):
  """Writes a graph proto to a file.

  The graph is written as a binary proto unless `as_text` is `True`.

  ```python
  v = tf.Variable(0, name='my_variable')
  sess = tf.Session()
  tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt')
  ```

  Args:
    graph_def: A `GraphDef` protocol buffer.
    logdir: Directory where to write the graph. This can refer to remote
      filesystems, such as Google Cloud Storage (GCS).
    name: Filename for the graph.
    as_text: If `True`, writes the graph as an ASCII proto.
  """
  # gcs does not have the concept of directory at the moment.
  if not file_io.file_exists(logdir) and not logdir.startswith("gs:"):
    file_io.recursive_create_dir(logdir)
  path = os.path.join(logdir, name)
  if as_text:
    file_io.write_string_to_file(path, str(graph_def))
  else:
    file_io.write_string_to_file(path, graph_def.SerializeToString())
コード例 #30
0
ファイル: task.py プロジェクト: googledatalab/pydatalab
def local_analysis(args):
  if args.analysis:
    # Already analyzed.
    return

  if not args.schema or not args.features:
    raise ValueError('Either --analysis, or both --schema and --features are provided.')

  tf_config = json.loads(os.environ.get('TF_CONFIG', '{}'))
  cluster_spec = tf_config.get('cluster', {})
  if len(cluster_spec.get('worker', [])) > 0:
    raise ValueError('If "schema" and "features" are provided, local analysis will run and ' +
                     'only BASIC scale-tier (no workers node) is supported.')

  if cluster_spec and not (args.schema.startswith('gs://') and args.features.startswith('gs://')):
    raise ValueError('Cloud trainer requires GCS paths for --schema and --features.')

  print('Running analysis.')
  schema = json.loads(file_io.read_file_to_string(args.schema).decode())
  features = json.loads(file_io.read_file_to_string(args.features).decode())
  args.analysis = os.path.join(args.job_dir, 'analysis')
  args.transform = True
  file_io.recursive_create_dir(args.analysis)
  feature_analysis.run_local_analysis(args.analysis, args.train, schema, features)
  print('Analysis done.')
コード例 #31
0
ファイル: save.py プロジェクト: zyc2621854/tensorflow
def _write_object_graph(saveable_view, export_dir, asset_file_def_index):
    """Save a SavedObjectGraph proto for `root`."""
    # SavedObjectGraph is similar to the CheckpointableObjectGraph proto in the
    # checkpoint. It will eventually go into the SavedModel.
    proto = saved_object_graph_pb2.SavedObjectGraph()
    saveable_view.fill_object_graph_proto(proto)

    node_ids = util.ObjectIdentityDictionary()
    for i, obj in enumerate(saveable_view.nodes):
        node_ids[obj] = i
        if resource_variable_ops.is_resource_variable(obj):
            node_ids[obj.handle] = i
        elif isinstance(obj, tracking.TrackableAsset):
            node_ids[obj.asset_path.handle] = i

    for obj, obj_proto in zip(saveable_view.nodes, proto.nodes):
        _write_object_proto(obj, obj_proto, asset_file_def_index, node_ids)

    extra_asset_dir = os.path.join(
        compat.as_bytes(export_dir),
        compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY))
    file_io.recursive_create_dir(extra_asset_dir)
    object_graph_filename = os.path.join(extra_asset_dir,
                                         compat.as_bytes("object_graph.pb"))
    file_io.write_string_to_file(object_graph_filename,
                                 proto.SerializeToString())
コード例 #32
0
ファイル: save.py プロジェクト: yanmovfer/tensorflow
def _write_object_graph(saveable_view, export_dir, asset_file_def_index):
    """Save a SavedObjectGraph proto for `root`."""
    # SavedObjectGraph is similar to the CheckpointableObjectGraph proto in the
    # checkpoint. It will eventually go into the SavedModel.
    proto = saved_object_graph_pb2.SavedObjectGraph()
    saveable_view.fill_object_graph_proto(proto)

    coder = nested_structure_coder.StructureCoder()
    for concrete_function in saveable_view.concrete_functions:
        serialized = function_serialization.serialize_concrete_function(
            concrete_function, saveable_view.captured_tensor_node_ids, coder)
        if serialized is not None:
            proto.concrete_functions[concrete_function.name].CopyFrom(
                serialized)

    for obj, obj_proto in zip(saveable_view.nodes, proto.nodes):
        _write_object_proto(obj, obj_proto, asset_file_def_index)

    extra_asset_dir = os.path.join(
        compat.as_bytes(export_dir),
        compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY))
    file_io.recursive_create_dir(extra_asset_dir)
    object_graph_filename = os.path.join(extra_asset_dir,
                                         compat.as_bytes("object_graph.pb"))
    file_io.write_string_to_file(object_graph_filename,
                                 proto.SerializeToString())
コード例 #33
0
ファイル: builder.py プロジェクト: zommerfelds/tensorflow
    def add_meta_graph_and_variables(self,
                                     sess,
                                     tags,
                                     signature_def_map=None,
                                     assets_collection=None,
                                     legacy_init_op=None):
        """Adds the current meta graph to the SavedModel and saves variables.

    Creates a Saver to save the variables from the provided session. Exports the
    corresponding meta graph def. This function assumes that the variables to be
    saved have been initialized. For a given `SavedModelBuilder`, this API must
    be called exactly once and for the first meta graph to save. For subsequent
    meta graph defs to be added, the `add_meta_graph()` API must be used.

    Args:
      sess: The TensorFlow session from which to save the meta graph and
        variables.
      tags: The set of tags with which to save the meta graph.
      signature_def_map: The map of signature def map to add to the meta graph
        def.
      assets_collection: Assets collection to be saved with SavedModel.
      legacy_init_op: Op or group of ops to execute after the restore op upon a
        load.
    """
        if self._has_saved_variables:
            raise AssertionError(
                "Variables and assets have already been saved. "
                "Please invoke `add_meta_graph()` instead.")

        # Save asset files and write them to disk, if any.
        self._save_and_write_assets(assets_collection)

        # Create the variables sub-directory, if it does not exist.
        variables_dir = os.path.join(
            compat.as_text(self._export_dir),
            compat.as_text(constants.VARIABLES_DIRECTORY))
        if not file_io.file_exists(variables_dir):
            file_io.recursive_create_dir(variables_dir)

        variables_path = os.path.join(
            compat.as_text(variables_dir),
            compat.as_text(constants.VARIABLES_FILENAME))

        # Add legacy init op to the SavedModel.
        self._maybe_add_legacy_init_op(legacy_init_op)

        # Save the variables and export meta graph def.
        saver = tf_saver.Saver(variables.all_variables(),
                               sharded=True,
                               write_version=saver_pb2.SaverDef.V2)
        saver.save(sess, variables_path, write_meta_graph=False)
        meta_graph_def = saver.export_meta_graph()

        # Tag the meta graph def and add it to the SavedModel.
        self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map)

        # Mark this instance of SavedModel as having saved variables, such that
        # subsequent attempts to save variables will fail.
        self._has_saved_variables = True
コード例 #34
0
ファイル: evaluator.py プロジェクト: pvk444/sciencebeam-gym
    def write_predictions(self):
        """Run one round of predictions and write predictions to csv file."""
        num_eval_batches = self.num_eval_batches
        num_detailed_eval_batches = self.num_detail_eval_batches
        with tf.Graph().as_default() as graph:
            tensors = self.model.build_eval_graph(self.eval_data_paths,
                                                  self.batch_size)
            self.graph_size = get_graph_size()
            saver = tf.train.Saver()

        sv = CustomSupervisor(model=self.model,
                              graph=graph,
                              logdir=self.output_path,
                              summary_op=None,
                              global_step=None,
                              saver=saver)

        file_io.recursive_create_dir(self.results_dir)

        accumulated_results = None

        last_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
        with sv.managed_session(master='',
                                start_standard_services=False) as session:
            sv.saver.restore(session, last_checkpoint)
            predictions_filename = os.path.join(self.output_path,
                                                'predictions.csv')
            with FileIO(predictions_filename, 'w') as csv_f:
                sv.start_queue_runners(session)
                last_log_progress = 0
                for eval_index in range(num_eval_batches):
                    progress = eval_index * 100 // num_eval_batches
                    if progress > last_log_progress:
                        logging.info('%3d%% predictions processed', progress)
                        last_log_progress = progress

                    detailed_evaluation = eval_index < num_detailed_eval_batches

                    fetches = self._get_default_fetches(tensors)
                    self._add_evaluation_result_fetches(fetches, tensors)
                    if detailed_evaluation:
                        self._add_image_fetches(fetches, tensors)
                    self._check_fetches(fetches)
                    results = session.run(
                        fetches, feed_dict={tensors.is_training: False})

                    accumulated_results = self._accumulate_evaluation_results(
                        results, accumulated_results)
                    if detailed_evaluation:
                        self._save_prediction_summary_image(
                            eval_index, results)
                        self._save_result_images(eval_index, results)
                        self._save_meta(eval_index, results)

                    input_uri = results['input_uri']
                    metric_values = results['metric_values']
                    csv_f.write('{},{}\n'.format(input_uri, metric_values[0]))

                self._save_accumulate_evaluation_results(accumulated_results)
コード例 #35
0
ファイル: builder.py プロジェクト: caikehe/tensorflow
  def add_meta_graph_and_variables(self,
                                   sess,
                                   tags,
                                   signature_def_map=None,
                                   assets_collection=None,
                                   legacy_init_op=None):
    """Adds the current meta graph to the SavedModel and saves variables.

    Creates a Saver to save the variables from the provided session. Exports the
    corresponding meta graph def. This function assumes that the variables to be
    saved have been initialized. For a given `SavedModelBuilder`, this API must
    be called exactly once and for the first meta graph to save. For subsequent
    meta graph defs to be added, the `add_meta_graph()` API must be used.

    Args:
      sess: The TensorFlow session from which to save the meta graph and
        variables.
      tags: The set of tags with which to save the meta graph.
      signature_def_map: The map of signature def map to add to the meta graph
        def.
      assets_collection: Assets collection to be saved with SavedModel.
      legacy_init_op: Op or group of ops to execute after the restore op upon a
        load.
    """
    if self._has_saved_variables:
      raise AssertionError("Variables and assets have already been saved. "
                           "Please invoke `add_meta_graph()` instead.")

    # Save asset files and write them to disk, if any.
    self._save_and_write_assets(assets_collection)

    # Create the variables sub-directory, if it does not exist.
    variables_dir = os.path.join(
        compat.as_text(self._export_dir),
        compat.as_text(constants.VARIABLES_DIRECTORY))
    if not file_io.file_exists(variables_dir):
      file_io.recursive_create_dir(variables_dir)

    variables_path = os.path.join(
        compat.as_text(variables_dir),
        compat.as_text(constants.VARIABLES_FILENAME))

    # Add legacy init op to the SavedModel.
    self._maybe_add_legacy_init_op(legacy_init_op)

    # Save the variables and export meta graph def.
    saver = tf_saver.Saver(
        variables.all_variables(),
        sharded=True,
        write_version=saver_pb2.SaverDef.V2)
    saver.save(sess, variables_path, write_meta_graph=False)
    meta_graph_def = saver.export_meta_graph()

    # Tag the meta graph def and add it to the SavedModel.
    self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map)

    # Mark this instance of SavedModel as having saved variables, such that
    # subsequent attempts to save variables will fail.
    self._has_saved_variables = True
コード例 #36
0
ファイル: utils_impl.py プロジェクト: ZhangXinNan/tensorflow
def get_or_create_assets_dir(export_dir):
  """Return assets sub-directory, or create one if it doesn't exist."""
  assets_destination_dir = get_assets_dir(export_dir)

  if not file_io.file_exists(assets_destination_dir):
    file_io.recursive_create_dir(assets_destination_dir)

  return assets_destination_dir
コード例 #37
0
 def testCreateRecursiveDir(self):
   dir_path = os.path.join(self._base_dir, "temp_dir/temp_dir1/temp_dir2")
   file_io.recursive_create_dir(dir_path)
   file_path = os.path.join(dir_path, "temp_file")
   file_io.FileIO(file_path, mode="w").write("testing")
   self.assertTrue(file_io.file_exists(file_path))
   file_io.delete_recursively(os.path.join(self._base_dir, "temp_dir"))
   self.assertFalse(file_io.file_exists(file_path))
コード例 #38
0
ファイル: test_training.py プロジェクト: zomglings/pydatalab
    def testNoKeys(self):
        output_dir = tempfile.mkdtemp()
        try:
            features = {
                'num': {
                    'transform': 'identity'
                },
                'target': {
                    'transform': 'target'
                }
            }
            schema = [{
                'name': 'num',
                'type': 'integer'
            }, {
                'name': 'target',
                'type': 'float'
            }]
            data = ['1,2\n', '4,8\n', '5,10\n', '11,22\n']
            file_io.recursive_create_dir(output_dir)
            file_io.write_string_to_file(
                os.path.join(output_dir, 'schema.json'),
                json.dumps(schema, indent=2))
            file_io.write_string_to_file(
                os.path.join(output_dir, 'features.json'),
                json.dumps(features, indent=2))
            file_io.write_string_to_file(os.path.join(output_dir, 'data.csv'),
                                         ''.join(data))

            cmd = [
                'python %s' % os.path.join(CODE_PATH, 'analyze.py'),
                '--output=' + os.path.join(output_dir, 'analysis'),
                '--csv=' + os.path.join(output_dir, 'data.csv'),
                '--schema=' + os.path.join(output_dir, 'schema.json'),
                '--features=' + os.path.join(output_dir, 'features.json')
            ]
            subprocess.check_call(' '.join(cmd), shell=True)

            cmd = [
                'cd %s && ' % CODE_PATH, 'python -m trainer.task',
                '--train=' + os.path.join(output_dir, 'data.csv'),
                '--eval=' + os.path.join(output_dir, 'data.csv'),
                '--job-dir=' + os.path.join(output_dir, 'training'),
                '--analysis=' + os.path.join(output_dir, 'analysis'),
                '--model=linear_regression', '--train-batch-size=4',
                '--eval-batch-size=4', '--max-steps=2000',
                '--learning-rate=0.1', '--transform'
            ]

            subprocess.check_call(' '.join(cmd), shell=True)

            result = run_exported_model(model_path=os.path.join(
                output_dir, 'training', 'model'),
                                        csv_data=['20'])

            self.assertTrue(abs(40 - result['predicted']) < 5)
        finally:
            shutil.rmtree(output_dir)
コード例 #39
0
def _write_assets(assets_directory, assets_filename):
    if not file_io.file_exists(assets_directory):
        file_io.recursive_create_dir(assets_directory)

    path = os.path.join(
        tf.compat.as_bytes(assets_directory), tf.compat.as_bytes(assets_filename)
    )
    file_io.write_string_to_file(path, "asset-file-contents")
    return path
コード例 #40
0
def _save_pipeline_config(pipeline_config, directory, filename):
    if not file_io.file_exists(directory):
        file_io.recursive_create_dir(directory)
    pipeline_config_path = os.path.join(directory, filename)
    config_text = text_format.MessageToString(pipeline_config)
    with tf.gfile.Open(pipeline_config_path, "wb") as f:
        tf.logging.info("Writing pipeline config file to %s",
                        pipeline_config_path)
        f.write(config_text)
コード例 #41
0
  def testMultipleColumnsRaw(self):
    """Test training starting from raw csv."""
    output_dir = tempfile.mkdtemp()
    try:
      features = {
          'num': {'transform': 'identity'},
          'num2': {'transform': 'key', 'source_column': 'num'},
          'target': {'transform': 'target'},
          'text': {'transform': 'bag_of_words'},
          'text2': {'transform': 'multi_hot', 'source_column': 'text'},
          'text3': {'transform': 'tfidf', 'source_column': 'text'},
          'text4': {'transform': 'key', 'source_column': 'text'}}
      schema = [
          {'name': 'num', 'type': 'integer'},
          {'name': 'target', 'type': 'float'},
          {'name': 'text', 'type': 'string'}]
      data = ['1,2,hello world\n', '4,8,bye moon\n', '5,10,hello moon\n', '11,22,moon moon\n']
      file_io.recursive_create_dir(output_dir)
      file_io.write_string_to_file(os.path.join(output_dir, 'schema.json'),
                                   json.dumps(schema, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'features.json'),
                                   json.dumps(features, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'data.csv'),
                                   ''.join(data))

      cmd = ['python %s' % os.path.join(CODE_PATH, 'analyze.py'),
             '--output=' + os.path.join(output_dir, 'analysis'),
             '--csv=' + os.path.join(output_dir, 'data.csv'),
             '--schema=' + os.path.join(output_dir, 'schema.json'),
             '--features=' + os.path.join(output_dir, 'features.json')]
      subprocess.check_call(' '.join(cmd), shell=True)

      cmd = ['cd %s && ' % CODE_PATH,
             'python -m trainer.task',
             '--train=' + os.path.join(output_dir, 'data.csv'),
             '--eval=' + os.path.join(output_dir, 'data.csv'),
             '--job-dir=' + os.path.join(output_dir, 'training'),
             '--analysis=' + os.path.join(output_dir, 'analysis'),
             '--model=linear_regression',
             '--train-batch-size=4',
             '--eval-batch-size=4',
             '--max-steps=200',
             '--learning-rate=0.1',
             '--transform']

      subprocess.check_call(' '.join(cmd), shell=True)

      result = run_exported_model(
          model_path=os.path.join(output_dir, 'training', 'model'),
          csv_data=['20,hello moon'])

      # check keys were made
      self.assertEqual(20, result['num2'])
      self.assertEqual('hello moon', result['text4'])
    finally:
      shutil.rmtree(output_dir)
コード例 #42
0
  def testMultipleColumnsRaw(self):
    """Test training starting from raw csv."""
    output_dir = tempfile.mkdtemp()
    try:
      features = {
          'num': {'transform': 'identity'},
          'num2': {'transform': 'key', 'source_column': 'num'},
          'target': {'transform': 'target'},
          'text': {'transform': 'bag_of_words'},
          'text2': {'transform': 'tfidf', 'source_column': 'text'},
          'text3': {'transform': 'key', 'source_column': 'text'}}
      schema = [
          {'name': 'num', 'type': 'integer'},
          {'name': 'target', 'type': 'float'},
          {'name': 'text', 'type': 'string'}]
      data = ['1,2,hello world\n', '4,8,bye moon\n', '5,10,hello moon\n', '11,22,moon moon\n']
      file_io.recursive_create_dir(output_dir)
      file_io.write_string_to_file(os.path.join(output_dir, 'schema.json'),
                                   json.dumps(schema, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'features.json'),
                                   json.dumps(features, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'data.csv'),
                                   ''.join(data))

      cmd = ['python %s' % os.path.join(CODE_PATH, 'analyze.py'),
             '--output=' + os.path.join(output_dir, 'analysis'),
             '--csv=' + os.path.join(output_dir, 'data.csv'),
             '--schema=' + os.path.join(output_dir, 'schema.json'),
             '--features=' + os.path.join(output_dir, 'features.json')]
      subprocess.check_call(' '.join(cmd), shell=True)

      cmd = ['cd %s && ' % CODE_PATH,
             'python -m trainer.task',
             '--train=' + os.path.join(output_dir, 'data.csv'),
             '--eval=' + os.path.join(output_dir, 'data.csv'),
             '--job-dir=' + os.path.join(output_dir, 'training'),
             '--analysis=' + os.path.join(output_dir, 'analysis'),
             '--model=linear_regression',
             '--train-batch-size=4',
             '--eval-batch-size=4',
             '--max-steps=200',
             '--learning-rate=0.1',
             '--transform']

      subprocess.check_call(' '.join(cmd), shell=True)

      result = run_exported_model(
          model_path=os.path.join(output_dir, 'training', 'model'),
          csv_data=['20,hello moon'])

      # check keys were made
      self.assertEqual(20, result['num2'])
      self.assertEqual('hello moon', result['text3'])
    finally:
      shutil.rmtree(output_dir)
コード例 #43
0
def _write_schema_to_disk(tempdir):
    """Writes test data schema to temporary a directory.

  Args:
    tempdir: Path to temporary directory.
  """
    test_transform_dir = os.path.join(tempdir, 'transformed_metadata',
                                      'v1-json')
    test_schema = os.path.join(test_transform_dir, 'schema.json')
    file_io.recursive_create_dir(test_transform_dir)
    file_io.write_string_to_file(test_schema, _TEST_METADATA_SCHEMA)
コード例 #44
0
  def testTopNZero(self):
    """Test top_n=0 gives all the classes."""
    output_dir = tempfile.mkdtemp()
    try:
      features = {
          'num': {'transform': 'identity'},
          'target': {'transform': 'target'}}
      schema = [
          {'name': 'num', 'type': 'integer'},
          {'name': 'target', 'type': 'string'}]
      data = ['1,1\n', '4,2\n', '5,3\n', '11,1\n']
      file_io.recursive_create_dir(output_dir)
      file_io.write_string_to_file(os.path.join(output_dir, 'schema.json'),
                                   json.dumps(schema, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'features.json'),
                                   json.dumps(features, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'data.csv'),
                                   ''.join(data))

      cmd = ['python %s' % os.path.join(CODE_PATH, 'analyze.py'),
             '--output=' + os.path.join(output_dir, 'analysis'),
             '--csv=' + os.path.join(output_dir, 'data.csv'),
             '--schema=' + os.path.join(output_dir, 'schema.json'),
             '--features=' + os.path.join(output_dir, 'features.json')]
      subprocess.check_call(' '.join(cmd), shell=True)

      cmd = ['cd %s && ' % CODE_PATH,
             'python -m trainer.task',
             '--train=' + os.path.join(output_dir, 'data.csv'),
             '--eval=' + os.path.join(output_dir, 'data.csv'),
             '--job-dir=' + os.path.join(output_dir, 'training'),
             '--analysis=' + os.path.join(output_dir, 'analysis'),
             '--model=linear_classification',
             '--train-batch-size=4',
             '--eval-batch-size=4',
             '--max-steps=1',
             '--top-n=0',  # This parameter is tested in this test!
             '--learning-rate=0.1',
             '--transform']

      subprocess.check_call(' '.join(cmd), shell=True)

      result = run_exported_model(
          model_path=os.path.join(output_dir, 'training', 'model'),
          csv_data=['20'])

      keys = result.keys()
      self.assertIn('predicted', keys)
      self.assertIn('1', keys)
      self.assertIn('2', keys)
      self.assertIn('3', keys)
    finally:
      shutil.rmtree(output_dir)
コード例 #45
0
ファイル: test_training.py プロジェクト: parthea/pydatalab
  def testManyKeys(self):
    output_dir = tempfile.mkdtemp()
    try:
      features = {
          'keyint': {'transform': 'key'},
          'keyfloat': {'transform': 'key'},
          'keystr': {'transform': 'key'},
          'num': {'transform': 'identity'},
          'target': {'transform': 'target'}}
      schema = [
          {'name': 'keyint', 'type': 'integer'},
          {'name': 'keyfloat', 'type': 'float'},
          {'name': 'keystr', 'type': 'string'},
          {'name': 'num', 'type': 'integer'},
          {'name': 'target', 'type': 'float'}]
      data = ['1,1.5,one,1,2\n', '2,2.5,two,4,8\n', '3,3.5,three,5,10\n']
      file_io.recursive_create_dir(output_dir)
      file_io.write_string_to_file(os.path.join(output_dir, 'schema.json'),
                                   json.dumps(schema, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'features.json'),
                                   json.dumps(features, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'data.csv'),
                                   ''.join(data))

      cmd = ['python %s' % os.path.join(CODE_PATH, 'analyze.py'),
             '--output=' + os.path.join(output_dir, 'analysis'),
             '--csv=' + os.path.join(output_dir, 'data.csv'),
             '--schema=' + os.path.join(output_dir, 'schema.json'),
             '--features=' + os.path.join(output_dir, 'features.json')]
      subprocess.check_call(' '.join(cmd), shell=True)

      cmd = ['cd %s && ' % CODE_PATH,
             'python -m trainer.task',
             '--train=' + os.path.join(output_dir, 'data.csv'),
             '--eval=' + os.path.join(output_dir, 'data.csv'),
             '--job-dir=' + os.path.join(output_dir, 'training'),
             '--analysis=' + os.path.join(output_dir, 'analysis'),
             '--model=linear_regression',
             '--train-batch-size=4',
             '--eval-batch-size=4',
             '--max-steps=2000',
             '--transform']

      subprocess.check_call(' '.join(cmd), shell=True)

      result = run_exported_model(
          model_path=os.path.join(output_dir, 'training', 'model'),
          csv_data=['7,4.5,hello,1'])
      self.assertEqual(7, result['keyint'])
      self.assertAlmostEqual(4.5, result['keyfloat'])
      self.assertEqual('hello', result['keystr'])
    finally:
      shutil.rmtree(output_dir)
コード例 #46
0
ファイル: util.py プロジェクト: obulpathi/cloud
    def end(self, session=None):
        super(ExportLastModelMonitor, self).end(session)

        file_io.recursive_create_dir(self._dest)
        _recursive_copy(self.last_export_dir, self._dest)

        if self._additional_assets:
            # TODO(rhaertel): use the actual assets directory. For now, metadata.yaml
            # must be a sibling of the export.meta file.
            assets_dir = self._dest
            file_io.create_dir(assets_dir)
            _copy_all(self._additional_assets, assets_dir)
コード例 #47
0
  def test_text(self):
    test_folder = os.path.join(self._bucket_root, 'test_text')
    input_file_path = os.path.join(test_folder, 'input.csv')
    output_folder = os.path.join(test_folder, 'test_output')
    file_io.recursive_create_dir(output_folder)

    csv_file = ['the quick brown fox,raining in kir,cat1|cat2,true',
                'quick   brown brown chicken,raining in pdx,cat2|cat3|cat4,false']
    file_io.write_string_to_file(
      input_file_path,
      '\n'.join(csv_file))

    schema = [{'name': 'col1', 'type': 'STRING'},
              {'name': 'col2', 'type': 'STRING'},
              {'name': 'col3', 'type': 'STRING'},
              {'name': 'col4', 'type': 'STRING'}]
    features = {'col1': {'transform': 'bag_of_words', 'source_column': 'col1'},
                'col2': {'transform': 'tfidf', 'source_column': 'col2'},
                'col3': {'transform': 'multi_hot', 'source_column': 'col3', 'separator': '|'},
                'col4': {'transform': 'target'}}
    analyze.run_cloud_analysis(
        output_dir=output_folder,
        csv_file_pattern=input_file_path,
        bigquery_table=None,
        schema=schema,
        features=features)

    stats = json.loads(
        file_io.read_file_to_string(
            os.path.join(output_folder, analyze.constant.STATS_FILE)).decode())
    self.assertEqual(stats['column_stats']['col1']['vocab_size'], 5)
    self.assertEqual(stats['column_stats']['col2']['vocab_size'], 4)
    self.assertEqual(stats['column_stats']['col3']['vocab_size'], 4)

    vocab_str = file_io.read_file_to_string(
        os.path.join(output_folder,
                     analyze.constant.VOCAB_ANALYSIS_FILE % 'col1'))
    vocab = pd.read_csv(six.StringIO(vocab_str),
                        header=None,
                        names=['col1', 'count'])
    self.assertEqual(vocab['col1'].tolist(),
                     ['brown', 'quick', 'chicken', 'fox', 'the', ])
    self.assertEqual(vocab['count'].tolist(), [2, 2, 1, 1, 1])

    vocab_str = file_io.read_file_to_string(
        os.path.join(output_folder,
                     analyze.constant.VOCAB_ANALYSIS_FILE % 'col2'))
    vocab = pd.read_csv(six.StringIO(vocab_str),
                        header=None,
                        names=['col2', 'count'])
    self.assertEqual(vocab['col2'].tolist(), ['in', 'raining', 'kir', 'pdx'])
    self.assertEqual(vocab['count'].tolist(), [2, 2, 1, 1])
コード例 #48
0
  def test_categorical(self):
    test_folder = os.path.join(self._bucket_root, 'test_categorical')
    input_file_path = os.path.join(test_folder, 'input.csv')
    output_folder = os.path.join(test_folder, 'test_output')
    file_io.recursive_create_dir(output_folder)

    csv_file = ['red,car,apple', 'red,truck,pepper', 'red,van,apple', 'blue,bike,grape',
                'blue,train,apple', 'green,airplane,pepper']
    file_io.write_string_to_file(
      input_file_path,
      '\n'.join(csv_file))

    schema = [{'name': 'color', 'type': 'STRING'},
              {'name': 'transport', 'type': 'STRING'},
              {'name': 'type', 'type': 'STRING'}]
    features = {'color': {'transform': 'one_hot', 'source_column': 'color'},
                'transport': {'transform': 'embedding', 'source_column': 'transport'},
                'type': {'transform': 'target'}}
    analyze.run_cloud_analysis(
        output_dir=output_folder,
        csv_file_pattern=input_file_path,
        bigquery_table=None,
        schema=schema,
        features=features)

    stats = json.loads(
        file_io.read_file_to_string(
            os.path.join(output_folder, analyze.constant.STATS_FILE)).decode())
    self.assertEqual(stats['column_stats']['color']['vocab_size'], 3)
    self.assertEqual(stats['column_stats']['transport']['vocab_size'], 6)

    # Color column.
    vocab_str = file_io.read_file_to_string(
      os.path.join(output_folder, analyze.constant.VOCAB_ANALYSIS_FILE % 'color'))
    vocab = pd.read_csv(six.StringIO(vocab_str),
                        header=None,
                        names=['color', 'count'])
    expected_vocab = pd.DataFrame(
        {'color': ['red', 'blue', 'green'], 'count': [3, 2, 1]},
        columns=['color', 'count'])
    pd.util.testing.assert_frame_equal(vocab, expected_vocab)

    # transport column.
    vocab_str = file_io.read_file_to_string(
        os.path.join(output_folder,
                     analyze.constant.VOCAB_ANALYSIS_FILE % 'transport'))
    vocab = pd.read_csv(six.StringIO(vocab_str),
                        header=None,
                        names=['transport', 'count'])
    self.assertEqual(vocab['count'].tolist(), [1 for i in range(6)])
    self.assertEqual(vocab['transport'].tolist(),
                     ['airplane', 'bike', 'car', 'train', 'truck', 'van'])
コード例 #49
0
ファイル: util.py プロジェクト: cottrell/notebooks
  def end(self, session=None):
    super(ExportLastModelMonitor, self).end(session)
    # Recursively copy the last location export dir from the exporter into the
    # main export location.
    file_io.recursive_create_dir(self._final_model_location)
    _recursive_copy(self.last_export_dir, self._final_model_location)

    if self._additional_assets:
      # TODO(rhaertel): use the actual assets directory. For now, metadata.json
      # must be a sibling of the export.meta file.
      assets_dir = self._final_model_location
      file_io.create_dir(assets_dir)
      _copy_all(self._additional_assets, assets_dir)
コード例 #50
0
def save_model(model, saved_model_path):
  """Save a `tf.keras.Model` into Tensorflow SavedModel format.

  `save_model` generates such files/folders under the `saved_model_path` folder:
  1) an asset folder containing the json string of the model's
  configuration(topology).
  2) a checkpoint containing the model weights.

  Note that subclassed models can not be saved via this function, unless you
  provide an implementation for get_config() and from_config().
  Also note that `tf.keras.optimizers.Optimizer` instances can not currently be
  saved to checkpoints. Use optimizers from `tf.train`.

  Args:
    model: A `tf.keras.Model` to be saved.
    saved_model_path: a string specifying the path to the SavedModel directory.

  Raises:
    NotImplementedError: If the passed in model is a subclassed model.
  """
  if not model._is_graph_network:
    raise NotImplementedError

  # save model configuration as a json string under assets folder.
  model_json = model.to_json()
  assets_destination_dir = os.path.join(
      compat.as_bytes(saved_model_path),
      compat.as_bytes(constants.ASSETS_DIRECTORY))

  if not file_io.file_exists(assets_destination_dir):
    file_io.recursive_create_dir(assets_destination_dir)

  model_json_filepath = os.path.join(
      compat.as_bytes(assets_destination_dir),
      compat.as_bytes(constants.SAVED_MODEL_FILENAME_JSON))
  file_io.write_string_to_file(model_json_filepath, model_json)

  # save model weights in checkpoint format.
  checkpoint_destination_dir = os.path.join(
      compat.as_bytes(saved_model_path),
      compat.as_bytes(constants.VARIABLES_DIRECTORY))

  if not file_io.file_exists(checkpoint_destination_dir):
    file_io.recursive_create_dir(checkpoint_destination_dir)

  checkpoint_prefix = os.path.join(
      compat.as_text(checkpoint_destination_dir),
      compat.as_text(constants.VARIABLES_FILENAME))
  model.save_weights(checkpoint_prefix, save_format='tf', overwrite=True)
コード例 #51
0
ファイル: builder.py プロジェクト: Qstar/tensorflow
  def __init__(self, export_dir):
    self._saved_model = saved_model_pb2.SavedModel()
    self._saved_model.saved_model_schema_version = (
        constants.SAVED_MODEL_SCHEMA_VERSION)

    self._export_dir = export_dir
    if not file_io.file_exists(export_dir):
      file_io.recursive_create_dir(self._export_dir)

    # Boolean to track whether variables and assets corresponding to the
    # SavedModel have been saved. Specifically, the first meta graph to be added
    # MUST use the add_meta_graph_and_variables() API. Subsequent add operations
    # on the SavedModel MUST use the add_meta_graph() API which does not save
    # weights.
    self._has_saved_variables = False
コード例 #52
0
ファイル: config_util.py プロジェクト: smajida/models
def save_pipeline_config(pipeline_config, directory):
  """Saves a pipeline config text file to disk.

  Args:
    pipeline_config: A pipeline_pb2.TrainEvalPipelineConfig.
    directory: The model directory into which the pipeline config file will be
      saved.
  """
  if not file_io.file_exists(directory):
    file_io.recursive_create_dir(directory)
  pipeline_config_path = os.path.join(directory, "pipeline.config")
  config_text = text_format.MessageToString(pipeline_config)
  with tf.gfile.Open(pipeline_config_path, "wb") as f:
    tf.logging.info("Writing pipeline config file to %s",
                    pipeline_config_path)
    f.write(config_text)
コード例 #53
0
ファイル: task.py プロジェクト: javiervicho/pydatalab
def recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  """

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True)
コード例 #54
0
  def testNoKeys(self):
    output_dir = tempfile.mkdtemp()
    try:
      features = {
          'num': {'transform': 'identity'},
          'target': {'transform': 'target'}}
      schema = [
          {'name': 'num', 'type': 'integer'},
          {'name': 'target', 'type': 'float'}]
      data = ['1,2\n', '4,8\n', '5,10\n', '11,22\n']
      file_io.recursive_create_dir(output_dir)
      file_io.write_string_to_file(os.path.join(output_dir, 'schema.json'),
                                   json.dumps(schema, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'features.json'),
                                   json.dumps(features, indent=2))
      file_io.write_string_to_file(os.path.join(output_dir, 'data.csv'),
                                   ''.join(data))

      cmd = ['python %s' % os.path.join(CODE_PATH, 'analyze.py'),
             '--output=' + os.path.join(output_dir, 'analysis'),
             '--csv=' + os.path.join(output_dir, 'data.csv'),
             '--schema=' + os.path.join(output_dir, 'schema.json'),
             '--features=' + os.path.join(output_dir, 'features.json')]
      subprocess.check_call(' '.join(cmd), shell=True)

      cmd = ['cd %s && ' % CODE_PATH,
             'python -m trainer.task',
             '--train=' + os.path.join(output_dir, 'data.csv'),
             '--eval=' + os.path.join(output_dir, 'data.csv'),
             '--job-dir=' + os.path.join(output_dir, 'training'),
             '--analysis=' + os.path.join(output_dir, 'analysis'),
             '--model=linear_regression',
             '--train-batch-size=4',
             '--eval-batch-size=4',
             '--max-steps=2000',
             '--learning-rate=0.1',
             '--transform']

      subprocess.check_call(' '.join(cmd), shell=True)

      result = run_exported_model(
          model_path=os.path.join(output_dir, 'training', 'model'),
          csv_data=['20'])

      self.assertTrue(abs(40 - result['predicted']) < 5)
    finally:
      shutil.rmtree(output_dir)
コード例 #55
0
def _write_assets(assets_directory, assets_filename):
    """Writes asset files to be used with SavedModel for half plus two.

  Args:
    assets_directory: The directory to which the assets should be written.
    assets_filename: Name of the file to which the asset contents should be
        written.

  Returns:
    The path to which the assets file was written.
  """
    if not file_io.file_exists(assets_directory):
        file_io.recursive_create_dir(assets_directory)

    path = os.path.join(compat.as_bytes(assets_directory), compat.as_bytes(assets_filename))
    file_io.write_string_to_file(path, "asset-file-contents")
    return path
コード例 #56
0
ファイル: graph_io.py プロジェクト: aritratony/tensorflow
def write_graph(graph_or_graph_def, logdir, name, as_text=True):
  """Writes a graph proto to a file.

  The graph is written as a text proto unless `as_text` is `False`.

  ```python
  v = tf.Variable(0, name='my_variable')
  sess = tf.compat.v1.Session()
  tf.io.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt')
  ```

  or

  ```python
  v = tf.Variable(0, name='my_variable')
  sess = tf.compat.v1.Session()
  tf.io.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt')
  ```

  Args:
    graph_or_graph_def: A `Graph` or a `GraphDef` protocol buffer.
    logdir: Directory where to write the graph. This can refer to remote
      filesystems, such as Google Cloud Storage (GCS).
    name: Filename for the graph.
    as_text: If `True`, writes the graph as an ASCII proto.

  Returns:
    The path of the output proto file.
  """
  if isinstance(graph_or_graph_def, ops.Graph):
    graph_def = graph_or_graph_def.as_graph_def()
  else:
    graph_def = graph_or_graph_def

  # gcs does not have the concept of directory at the moment.
  if not file_io.file_exists(logdir) and not logdir.startswith('gs:'):
    file_io.recursive_create_dir(logdir)
  path = os.path.join(logdir, name)
  if as_text:
    file_io.atomic_write_string_to_file(path,
                                        text_format.MessageToString(
                                            graph_def, float_format=''))
  else:
    file_io.atomic_write_string_to_file(path, graph_def.SerializeToString())
  return path
コード例 #57
0
ファイル: util.py プロジェクト: googledatalab/pydatalab
def _recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  When called, dest_dir should exist.
  """
  src_dir = python_portable_string(src_dir)
  dest_dir = python_portable_string(dest_dir)

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      _recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True)
コード例 #58
0
ファイル: analyze.py プロジェクト: javiervicho/pydatalab
def main(argv=None):
  args = parse_arguments(sys.argv if argv is None else argv)

  if args.schema:
    schema = json.loads(
        file_io.read_file_to_string(args.schema).decode())
  else:
    import google.datalab.bigquery as bq
    schema = bq.Table(args.bigquery).schema._bq_schema
  features = json.loads(
      file_io.read_file_to_string(args.features).decode())

  expand_defaults(schema, features)  # features are updated.
  inverted_features = invert_features(features)
  check_schema_transforms_match(schema, inverted_features)

  file_io.recursive_create_dir(args.output)

  if args.cloud:
    run_cloud_analysis(
        output_dir=args.output,
        csv_file_pattern=args.csv,
        bigquery_table=args.bigquery,
        schema=schema,
        inverted_features=inverted_features)
  else:
    run_local_analysis(
        output_dir=args.output,
        csv_file_pattern=args.csv,
        schema=schema,
        inverted_features=inverted_features)

  # Save a copy of the schema and features in the output folder.
  file_io.write_string_to_file(
    os.path.join(args.output, constant.SCHEMA_FILE),
    json.dumps(schema, indent=2))

  file_io.write_string_to_file(
    os.path.join(args.output, constant.FEATURES_FILE),
    json.dumps(features, indent=2))
コード例 #59
0
def local_batch_predict(model_dir, csv_file_pattern, output_dir, output_format, batch_size=100):
  """ Batch Predict with a specified model.

  It does batch prediction, saves results to output files and also creates an output
  schema file. The output file names are input file names prepended by 'predict_results_'.

  Args:
    model_dir: The model directory containing a SavedModel (usually saved_model.pb).
    csv_file_pattern: a pattern of csv files as batch prediction source.
    output_dir: the path of the output directory.
    output_format: csv or json.
    batch_size: Larger batch_size improves performance but may
        cause more memory usage.
  """

  file_io.recursive_create_dir(output_dir)
  csv_files = file_io.get_matching_files(csv_file_pattern)
  if len(csv_files) == 0:
    raise ValueError('No files found given ' + csv_file_pattern)

  with tf.Graph().as_default(), tf.Session() as sess:
    input_alias_map, output_alias_map = _tf_load_model(sess, model_dir)
    csv_tensor_name = list(input_alias_map.values())[0]
    output_schema = _get_output_schema(sess, output_alias_map)
    for csv_file in csv_files:
      output_file = os.path.join(
          output_dir,
          'predict_results_' +
          os.path.splitext(os.path.basename(csv_file))[0] + '.' + output_format)
      with file_io.FileIO(output_file, 'w') as f:
        prediction_source = _batch_csv_reader(csv_file, batch_size)
        for batch in prediction_source:
          batch = [l.rstrip() for l in batch if l]
          predict_results = sess.run(fetches=output_alias_map, feed_dict={csv_tensor_name: batch})
          formatted_results = _format_results(output_format, output_schema, predict_results)
          f.write('\n'.join(formatted_results) + '\n')

  file_io.write_string_to_file(os.path.join(output_dir, 'predict_results_schema.json'),
                               json.dumps(output_schema, indent=2))