Example #1
0
def create_dir_test():
    """Verifies file_io directory handling methods ."""

    starttime = int(round(time.time() * 1000))
    dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime)
    print("Creating dir %s" % dir_name)
    file_io.create_dir(dir_name)
    elapsed = int(round(time.time() * 1000)) - starttime
    print("Created directory in: %d milliseconds" % elapsed)
    # Check that the directory exists.
    dir_exists = file_io.is_directory(dir_name)
    print("%s directory exists: %s" % (dir_name, dir_exists))

    # List contents of just created directory.
    print("Listing directory %s." % dir_name)
    starttime = int(round(time.time() * 1000))
    print(file_io.list_directory(dir_name))
    elapsed = int(round(time.time() * 1000)) - starttime
    print("Listed directory %s in %s milliseconds" % (dir_name, elapsed))

    # Delete directory.
    print("Deleting directory %s." % dir_name)
    starttime = int(round(time.time() * 1000))
    file_io.delete_recursively(dir_name)
    elapsed = int(round(time.time() * 1000)) - starttime
    print("Deleted directory %s in %s milliseconds" % (dir_name, elapsed))
Example #2
0
 def testListDirectory(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   file_io.create_dir(dir_path)
   files = ["file1.txt", "file2.txt", "file3.txt"]
   for name in files:
     file_path = os.path.join(dir_path, name)
     file_io.FileIO(file_path, mode="w").write("testing")
   subdir_path = os.path.join(dir_path, "sub_dir")
   file_io.create_dir(subdir_path)
   subdir_file_path = os.path.join(subdir_path, "file4.txt")
   file_io.FileIO(subdir_file_path, mode="w").write("testing")
   dir_list = file_io.list_directory(dir_path)
   self.assertItemsEqual(files + ["sub_dir"], dir_list)
Example #3
0
 def testListDirectory(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   file_io.create_dir(dir_path)
   files = [b"file1.txt", b"file2.txt", b"file3.txt"]
   for name in files:
     file_path = os.path.join(dir_path, compat.as_str_any(name))
     file_io.write_string_to_file(file_path, "testing")
   subdir_path = os.path.join(dir_path, "sub_dir")
   file_io.create_dir(subdir_path)
   subdir_file_path = os.path.join(subdir_path, "file4.txt")
   file_io.write_string_to_file(subdir_file_path, "testing")
   dir_list = file_io.list_directory(dir_path)
   self.assertItemsEqual(files + [b"sub_dir"], dir_list)
Example #4
0
 def testListDirectory(self, join):
     dir_path = join(self._base_dir, "test_dir")
     file_io.create_dir(dir_path)
     files = ["file1.txt", "file2.txt", "file3.txt"]
     for name in files:
         file_path = join(str(dir_path), name)
         file_io.FileIO(file_path, mode="w").write("testing")
     subdir_path = join(str(dir_path), "sub_dir")
     file_io.create_dir(subdir_path)
     subdir_file_path = join(str(subdir_path), "file4.txt")
     file_io.FileIO(subdir_file_path, mode="w").write("testing")
     dir_list = file_io.list_directory(dir_path)
     self.assertItemsEqual(files + ["sub_dir"], dir_list)
Example #5
0
 def _get_tfrecord_filenames(dir_path):
     assert isinstance(dir_path,
                       str), "dir_path is not a String: %r" % dir_path
     assert file_io.file_exists(
         dir_path), "directory `%s` does not exist" % dir_path
     assert file_io.is_directory(
         dir_path), "`%s` is not a directory" % dir_path
     flist = file_io.list_directory(dir_path)
     input_files = [
         pjoin(dir_path, x)
         for x in filter(lambda f: not f.startswith("_"), flist)
     ]
     filenames = tf.placeholder_with_default(input_files, shape=[None])
     return filenames
Example #6
0
def get_record_id_map(brats_tfrecords_dir):
    """
    Get the mapping from patient_id --> TFRecord file

    :param brats_tfrecords_dir: directory containing all TFRecords
    :return: Dictionary mapping patient_id to TFRecord file
    """
    tfrecord_filenames = file_io.list_directory(brats_tfrecords_dir)
    id_record_map = {}
    for file_name in tfrecord_filenames:
        patient_id = get_id_of_TFRecord(file_name)
        id_record_map[patient_id] = os.path.join(brats_tfrecords_dir,
                                                 file_name)
    return id_record_map
Example #7
0
    def testWriteTransformFnIsRetryable(self):
        tft.test_case.skip_if_external_environment(
            'Retries are currently not available on this environment.')
        original_copy_tree_to_unique_temp_dir = (
            transform_fn_io._copy_tree_to_unique_temp_dir)

        def mock_copy_tree_to_unique_temp_dir(source, base_temp_dir_path):
            """Mocks transform_fn_io._copy_tree to fail the first time it is called by this test, thus forcing a retry which should succeed."""
            global _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED
            if not _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED:
                _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED = True
                original_copy_tree_to_unique_temp_dir(source,
                                                      base_temp_dir_path)
                raise ArithmeticError('Some error')
            return original_copy_tree_to_unique_temp_dir(
                source, base_temp_dir_path)

        with self._makeTestPipeline() as pipeline:
            transform_output_dir = os.path.join(self.get_temp_dir(), 'output')
            # Create an empty directory for the source saved model dir.
            saved_model_dir = os.path.join(self.get_temp_dir(), 'source')
            file_io.recursive_create_dir(saved_model_dir)
            saved_model_path = os.path.join(saved_model_dir, 'saved_model')
            with file_io.FileIO(saved_model_path, mode='w') as f:
                f.write('some content')
            saved_model_dir_pcoll = (
                pipeline
                | 'CreateSavedModelDir' >> beam.Create([saved_model_dir]))
            # Combine test metadata with a dict of PCollections resolving futures.
            deferred_metadata = pipeline | 'CreateDeferredMetadata' >> beam.Create(
                [test_metadata.COMPLETE_METADATA])
            metadata = beam_metadata_io.BeamDatasetMetadata(
                test_metadata.INCOMPLETE_METADATA, deferred_metadata)
            with mock.patch.object(transform_fn_io,
                                   '_copy_tree_to_unique_temp_dir',
                                   mock_copy_tree_to_unique_temp_dir):
                _ = ((saved_model_dir_pcoll, metadata)
                     | transform_fn_io.WriteTransformFn(transform_output_dir))

        # Test reading with TFTransformOutput
        tf_transform_output = tft.TFTransformOutput(transform_output_dir)
        metadata = tf_transform_output.transformed_metadata
        self.assertEqual(metadata, test_metadata.COMPLETE_METADATA)

        transform_fn_dir = tf_transform_output.transform_savedmodel_dir
        self.assertTrue(file_io.file_exists(transform_fn_dir))
        self.assertTrue(file_io.is_directory(transform_fn_dir))
        # Check temp directory created by failed run was cleaned up.
        self.assertEqual(2, len(file_io.list_directory(transform_output_dir)))
Example #8
0
        def proc_tensorboard_works_with_same_file_path(test_obj,
                                                       saving_filepath):
            model, _, train_ds, steps = _model_setup(test_obj, file_format='')
            num_epoch = 2

            # The saving_filepath shouldn't exist at the beginning (as it's unique).
            test_obj.assertFalse(file_io.file_exists(saving_filepath))

            model.fit(
                x=train_ds,
                epochs=num_epoch,
                steps_per_epoch=steps,
                callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)])

            test_obj.assertTrue(file_io.list_directory(saving_filepath))
Example #9
0
def recursive_copy(src_dir, dest_dir):
    """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  """

    file_io.recursive_create_dir(dest_dir)
    for file_name in file_io.list_directory(src_dir):
        old_path = os.path.join(src_dir, file_name)
        new_path = os.path.join(dest_dir, file_name)

        if file_io.is_directory(old_path):
            recursive_copy(old_path, new_path)
        else:
            file_io.copy(old_path, new_path, overwrite=True)
Example #10
0
def recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  """

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True)
def main():
  tf.logging.set_verbosity(tf.logging.INFO)
  args = parse_arguments()
  args.slice_columns = [
    column
    for column in column_group.split(',')
    for column_group in args.slice_columns
  ]
  schema = json.loads(file_io.read_file_to_string(args.schema))
  eval_model_parent_dir = os.path.join(args.model, 'tfma_eval_model_dir')
  model_export_dir = os.path.join(eval_model_parent_dir, file_io.list_directory(eval_model_parent_dir)[0])
  run_analysis(args.output, model_export_dir, args.eval, schema,
               args.project, args.mode, args.slice_columns)
  generate_static_html_output(args.output, args.slice_columns)
  with open('/output.txt', 'w') as f:
    f.write(args.output)
def list_odps_table_oss_files(table_name):
    table_data_meta = os.path.join(FLAGS.buckets, table_name, '.odps/.meta')
    print(table_data_meta)

    meta_str = file_io.read_file_to_string(table_data_meta)
    print('meta_str:', meta_str)

    meta_dict = json.loads(meta_str)
    table_data_dirs = meta_dict['dirs']
    table_data_dirs = [os.path.join(FLAGS.buckets, table_name, '.odps', d) for d in table_data_dirs]

    all_files = []
    for data_dir in table_data_dirs:
        print("data_dir:", data_dir)
        tmp_files = file_io.list_directory(data_dir)
        for fname in tmp_files:
            all_files.append(os.path.join(data_dir, fname))
    return all_files
Example #13
0
def _recursive_copy(src_dir, dest_dir):
    """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  When called, dest_dir should exist.
  """
    src_dir = python_portable_string(src_dir)
    dest_dir = python_portable_string(dest_dir)

    file_io.recursive_create_dir(dest_dir)
    for file_name in file_io.list_directory(src_dir):
        old_path = os.path.join(src_dir, file_name)
        new_path = os.path.join(dest_dir, file_name)

        if file_io.is_directory(old_path):
            _recursive_copy(old_path, new_path)
        else:
            file_io.copy(old_path, new_path, overwrite=True)
Example #14
0
    def load_paths_only(path):
        print("Parsing data...")
        gui_paths = []
        img_paths = []
        for f in file_io.list_directory(path):
            if f.find(".gui") != -1:
                path_gui = "{}/{}".format(path, f)
                gui_paths.append(path_gui)
                file_name = f[:f.find(".gui")]

                if file_io.file_exists("{}/{}.png".format(path, file_name)):
                    path_img = "{}/{}.png".format(path, file_name)
                    img_paths.append(path_img)
                elif file_io.file_exists("{}/{}.npz".format(path, file_name)):
                    path_img = "{}/{}.npz".format(path, file_name)
                    img_paths.append(path_img)

        assert len(gui_paths) == len(img_paths)
        return gui_paths, img_paths
Example #15
0
def _recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.
  Args:
    src_dir: gsc or local path.
    dest_dir: gcs or local path.
  When called, dest_dir should exist.
  """
  src_dir = python_portable_string(src_dir)
  dest_dir = python_portable_string(dest_dir)

  file_io.recursive_create_dir(dest_dir)
  for file_name in file_io.list_directory(src_dir):
    old_path = os.path.join(src_dir, file_name)
    new_path = os.path.join(dest_dir, file_name)

    if file_io.is_directory(old_path):
      _recursive_copy(old_path, new_path)
    else:
      file_io.copy(old_path, new_path, overwrite=True)
Example #16
0
    def generator(subdir, batch_size):
        desired_size = 224
        file_names = [(desired_size, subdir, fn) for fn in file_io.list_directory(subdir)]
        np.random.shuffle(file_names)
        i = 0
        batch = np.zeros((batch_size, desired_size, desired_size, 3))
        labels = np.zeros((batch_size, LABELS))
        p = Pool()

        while True:
            for im, label in p.imap_unordered(read_image, file_names):
                if im == -1:
                    continue
                batch[i], labels[i] = im, label
                if i == batch_size-1:
                    yield batch, labels
                    i = 0
                else:
                    i += 1
Example #17
0
def create_dir_test():
    """Verifies file_io directory handling methods ."""

    starttime = int(round(time.time() * 1000))
    dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime)
    print("Creating dir %s" % dir_name)
    file_io.create_dir(dir_name)
    elapsed = int(round(time.time() * 1000)) - starttime
    print("Created directory in: %d milliseconds" % elapsed)
    # Check that the directory exists.
    dir_exists = file_io.is_directory(dir_name)
    print("%s directory exists: %s" % (dir_name, dir_exists))

    # List contents of just created directory.
    starttime = int(round(time.time() * 1000))
    print("Listing directory %s." % dir_name)
    print(file_io.list_directory(dir_name))
    elapsed = int(round(time.time() * 1000)) - starttime
    print("Listed directory %s in %s milliseconds" % (dir_name, elapsed))
Example #18
0
    def load(self) -> T5ForConditionalGeneration:
        try:
            if not self.flush_cache:
                return self._fix_t5_model(
                    T5ForConditionalGeneration.from_pretrained(
                        str(self.model_cache_dir),
                        from_tf=True,
                        force_download=False))
        except (RuntimeError, OSError):
            logging.info('T5 model weights not in cache.')
        m = re.search(r'model_checkpoint_path: "(.+?)"', self.ckpt_prefix)
        assert m is not None, 'checkpoint file malformed'

        # Copy over checkpoint data
        ckpt_patt = re.compile(
            rf'^{m.group(1)}\.(data-\d+-of-\d+|index|meta)$')
        for name in file_io.list_directory(self.url):
            if not ckpt_patt.match(name):
                continue
            url = os.path.join(self.url, name)
            url_stat = file_io.stat(url)
            cache_file_path = self.model_cache_dir / ckpt_patt.sub(
                rf'{TRANSFO_PREFIX}.\1', name)
            try:
                cs = os.stat(str(cache_file_path))
                if cs.st_size == url_stat.length and cs.st_mtime_ns > url_stat.mtime_nsec and not self.flush_cache:
                    logging.info(f'Skipping {name}...')
                    continue
            except FileNotFoundError:
                pass
            logging.info(f'Caching {name}...')
            file_io.copy(url, str(cache_file_path), overwrite=True)

        # Transformers expects a model config.json
        config = T5Config.from_pretrained(self.model_type)
        with open(str(self.model_cache_dir / 'config.json'), 'w') as f:
            json.dump(config.__dict__, f, indent=4)
        return self._fix_t5_model(
            T5ForConditionalGeneration.from_pretrained(str(
                self.model_cache_dir),
                                                       from_tf=True,
                                                       force_download=False))
Example #19
0
def dataset(data_path, batch_size=1, shuffle=False, repeat=False):
    names = file_io.list_directory(data_path)
    _paths = []
    for name in names:
        _paths.append(os.path.join(data_path, name))
    ds = tf.data.TFRecordDataset(_paths)
    ds = ds.map(__parse_proto)
    ds = ds.map(
        lambda article, abstract: tuple(tf.py_func(
            __preprocess_article_and_abstract,
            [article, abstract],
            [tf.string, tf.string],
            name='preprocess_article_and_abstract'
        )))
    if shuffle:
        ds = ds.shuffle(buffer_size=100)
    ds = ds.batch(batch_size, drop_remainder=True)
    if repeat:
        ds = ds.repeat()
    return ds
Example #20
0
    def load(self, path, generate_binary_sequences=False):
        print("Loading data...")
        for f in file_io.list_directory(path):
            if f.find(".gui") != -1:
                gui = file_io.FileIO("{}/{}".format(path, f), 'r')
                file_name = f[:f.find(".gui")]

                if file_io.file_exists("{}/{}.png".format(path, file_name)):
                    img = Utils.get_preprocessed_img(
                        "{}/{}.png".format(path, file_name), IMAGE_SIZE)
                    self.append(file_name, gui, img)
                elif file_io.file_exists("{}/{}.npz".format(path, file_name)):
                    f_str = StringIO(
                        file_io.read_file_to_string("{}/{}.npz".format(
                            path, file_name)))
                    img = np.load(f_str)["features"]
                    self.append(file_name, gui, img)

        print("Generating sparse vectors...")
        self.voc.create_binary_representation()
        self.next_words = self.sparsify_labels(self.next_words, self.voc)
        if generate_binary_sequences:
            self.partial_sequences = self.binarize(self.partial_sequences,
                                                   self.voc)
        else:
            self.partial_sequences = self.indexify(self.partial_sequences,
                                                   self.voc)

        self.size = len(self.ids)
        assert self.size == len(self.input_images) == len(
            self.partial_sequences) == len(self.next_words)
        assert self.voc.size == len(self.voc.vocabulary)

        print("Dataset size: {}".format(self.size))
        print("Vocabulary size: {}".format(self.voc.size))

        self.input_shape = self.input_images[0].shape
        self.output_size = self.voc.size

        print("Input shape: {}".format(self.input_shape))
        print("Output size: {}".format(self.output_size))
Example #21
0
  def __init__(self, export_dir):
    self._saved_model = saved_model_pb2.SavedModel()
    self._saved_model.saved_model_schema_version = (
        constants.SAVED_MODEL_SCHEMA_VERSION)

    self._export_dir = export_dir
    if file_io.file_exists(export_dir):
      if file_io.list_directory(export_dir):
        raise AssertionError(
            "Export directory already exists, and isn't empty. Please choose "
            "a different export directory, or delete all the contents of the "
            "specified directory: %s" % export_dir)
    else:
      file_io.recursive_create_dir(self._export_dir)

    # Boolean to track whether variables and assets corresponding to the
    # SavedModel have been saved. Specifically, the first meta graph to be added
    # MUST use the add_meta_graph_and_variables() API. Subsequent add operations
    # on the SavedModel MUST use the add_meta_graph() API which does not save
    # weights.
    self._has_saved_variables = False
Example #22
0
def leer_imagenes_etiquetas(ruta):
    ## variables
    x = []
    y = []

    ruta = ruta + "/"
    print('LA RUTA ES...' + ruta)
    ## obtenemos la lista de ficneros para la ruta pasada por parámentro
    filelist = sorted(file_io.list_directory(ruta))
    random.seed(42)
    random.shuffle(filelist)

    print('TAMAÑO DIRECTORIO...' + str(len(filelist)) + "  primero..." +
          filelist[1])
    ## obtenemos las imágenes y las etiquetas de cada elemento de la lista
    for imgPath in filelist:
        imagen = leer_una_imagen(ruta + imgPath)
        imagen = img_to_array(imagen)  # pasa a array
        x.append(imagen)
        # lee, redimensiona y lo añade al array

        #imagePath = ruta + imgPath
        #imagen = cv2.imread(imagePath) # lee
        #imagen = cv2.resize(imagen, (img_width, img_height))  # redimentsiona

        # extraemos la etiqueta del nombre de cada fichero
        #ruta_partes = imagePath.split("/")  # lo dividimos en las partes de la ruta
        #nombreimagen = ruta_partes[len(ruta_partes)-1]   # nos quedamos con el nombre del fichero, que es el último

        label = imgPath[imgPath.find("_") +
                        1:imgPath.find(".")]  # la etiqueta está entre _ y .
        label = int(
            label
        ) - 1  # las etiquetas empiezan en 1, y les restamos 1 para que empiecen en 0
        y.append(label)

    print('REGISTROS LEIDOS X...' + str(len(x)))
    print('REGISTROS LEIDOS Y...' + str(len(y)))
    return (x, y)
Example #23
0
def load_library(library_location):
    """Loads a TensorFlow plugin.

  "library_location" can be a path to a specific shared object, or a folder.
  If it is a folder, all sahred objects that are named "libtfkernel*" will be
  loaded. When the library is loaded, kernels registered in the library via the
  `REGISTER_*` macros are made available in the TensorFlow process.

  Args:
    library_location: Path to the plugin or the folder of plugins.
      Relative or absolute filesystem path to a dynamic library file or folder.

  Returns:
    None

  Raises:
    OSError: When the file to be loaded is not found.
    RuntimeError: when unable to load the library.
  """
    if file_io.file_exists(library_location):
        if file_io.is_directory(library_location):
            directory_contents = file_io.list_directory(library_location)

            kernel_libraries = [
                os.path.join(library_location, f) for f in directory_contents
                if _is_shared_object(f)
            ]
        else:
            kernel_libraries = [library_location]

        for lib in kernel_libraries:
            py_tf.TF_LoadLibrary(lib)

    else:
        raise OSError(
            errno.ENOENT,
            'The file or folder to load kernel libraries from does not exist.',
            library_location)
Example #24
0
def leer_imagenes_etiquetas(ruta):
    ## variables
    x = []
    y = []

    ## obtenemos la lista de ficneros para la ruta pasada por parámentro
    ruta = ruta + "/"
    print('LA RUTA ES...' + ruta)
    filelist = sorted(file_io.list_directory(ruta))
    random.seed(42)
    random.shuffle(filelist)

    ## obtenemos las imágenes y las etiquetas de cada elemento de la lista
    print("EMPIEZA A LEER  ", time.time())

    with tqdm(total=len(filelist)) as t:
        for imgPath in filelist:
            imagen = leer_una_imagen(ruta + imgPath)
            if not imagen:
                continue  # si ha fallado la lectura, y por tanto viene vacío, nos saltamos la imagen
            try:
                imagen = img_to_array(
                    imagen)  # pasa a array, si falla nos saltamos la imagen
            except:
                continue
            x.append(imagen)

            label = imgPath[imgPath.find("_") + 1:imgPath.find(
                ".")]  # la etiqueta está entre _ y .
            label = int(
                label
            ) - 1  # las etiquetas empiezan en 1, y les restamos 1 para que empiecen en 0
            y.append(label)
            t.update(1)
    print('REGISTROS LEIDOS X...' + str(len(x)))
    print('REGISTROS LEIDOS Y...' + str(len(y)))
    return (x, y)
Example #25
0
def load_library(library_location):
  """Loads a TensorFlow plugin.

  "library_location" can be a path to a specific shared object, or a folder.
  If it is a folder, all sahred objects that are named "libtfkernel*" will be
  loaded. When the library is loaded, kernels registered in the library via the
  `REGISTER_*` macros are made available in the TensorFlow process.

  Args:
    library_location: Path to the plugin or the folder of plugins.
      Relative or absolute filesystem path to a dynamic library file or folder.

  Returns:
    None

  Raises:
    OSError: When the file to be loaded is not found.
    RuntimeError: when unable to load the library.
  """
  if file_io.file_exists(library_location):
    if file_io.is_directory(library_location):
      directory_contents = file_io.list_directory(library_location)

      kernel_libraries = [
          os.path.join(library_location, f) for f in directory_contents
          if _is_shared_object(f)]
    else:
      kernel_libraries = [library_location]

    for lib in kernel_libraries:
      py_tf.TF_LoadLibrary(lib)

  else:
    raise OSError(
        errno.ENOENT,
        'The file or folder to load kernel libraries from does not exist.',
        library_location)
Example #26
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    args = parse_arguments()
    # Models trained with estimator are exported to base/export/export/123456781 directory.
    # Our trainer export only one model.
    export_parent_dir = os.path.join(args.model, 'export', 'export')
    model_export_dir = os.path.join(export_parent_dir, file_io.list_directory(export_parent_dir)[0])
    schema = json.loads(file_io.read_file_to_string(args.schema))
    run_predict(args.output, args.data, schema, args.target, model_export_dir,
              args.project, args.mode, args.batchsize)
    prediction_results = os.path.join(args.output, 'prediction_results-*')
    with open('/output.txt', 'w') as f:
        f.write(prediction_results)

    tmp = os.path.join(args.output, 'prediction_results-00000-of-00001')
    with open('/prediction-output.txt', 'w') as f:
        f.write(open(tmp,"r").read())

    with file_io.FileIO(os.path.join(args.output, 'schema.json'), 'r') as f:
        schema = json.load(f)

    BUCKET_PATH = "/".join(args.output.split('/')[3:])

    upload_to_minio('/prediction-output.txt', BUCKET_PATH)

    metadata = {
    'outputs' : [{
      'type': 'table',
      'storage': 'minio',
      'format': 'csv',
      'header': [x['name'] for x in schema],
      'source': "{}://{}/{}/{}".format("minio",BUCKET_NAME,BUCKET_PATH,"prediction-output.txt")
    }]
    }
    with open('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)
Example #27
0
 def testListDirectoryFailure(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   with self.assertRaises(errors.NotFoundError):
     file_io.list_directory(dir_path)
Example #28
0
def load_session_bundle_from_path(export_dir, target="", config=None):
    """Load session bundle from the given path.

  The function reads input from the export_dir, constructs the graph data to the
  default graph and restores the parameters for the session created.

  Args:
    export_dir: the directory that contains files exported by exporter.
    target: The execution engine to connect to. See target in tf.Session()
    config: A ConfigProto proto with configuration options. See config in
    tf.Session()

  Returns:
    session: a tensorflow session created from the variable files.
    meta_graph: a meta graph proto saved in the exporter directory.

  Raises:
    RuntimeError: if the required files are missing or contain unrecognizable
    fields, i.e. the exported model is invalid.
  """
    if hasattr(tf, "GIT_VERSION"):
        logging.info("tf.GIT_VERSION=%s", tf.GIT_VERSION)
    else:
        logging.info("tf.GIT_VERSION=unknown")

    meta_graph_filename = os.path.join(export_dir, META_GRAPH_DEF_FILENAME)
    if not file_io.file_exists(meta_graph_filename):
        raise RuntimeError("Expected meta graph file missing %s" %
                           meta_graph_filename)

    variables_filename = ""
    variables_filename_list = []
    additional_files_to_copy = []
    checkpoint_sharded = False

    variables_index_filename = os.path.join(export_dir,
                                            VARIABLES_INDEX_FILENAME_V2)
    checkpoint_v2 = file_io.file_exists(variables_index_filename)

    if checkpoint_v2:
        # The checkpoint is in v2 format.
        variables_filename = os.path.join(export_dir, VARIABLES_FILENAME_V2)
        # Check to see if the file "export" exists or not.
        if file_io.file_exists(variables_filename):
            variables_filename_list = [variables_filename]
        else:
            # Check to see if the sharded file named "export-?????-of-?????" exists.
            variables_filename_list = fnmatch.filter(
                file_io.list_directory(export_dir),
                VARIABLES_FILENAME_PATTERN_V2)
            checkpoint_sharded = True
        # If the checkpoint is not local, we need to copy export.index locally too.
        additional_files_to_copy = [variables_index_filename]
    else:
        variables_filename = os.path.join(export_dir, VARIABLES_FILENAME)
        if file_io.file_exists(variables_filename):
            variables_filename_list = [variables_filename]
        else:
            variables_filename_list = fnmatch.filter(
                file_io.list_directory(export_dir), VARIABLES_FILENAME_PATTERN)
            checkpoint_sharded = True

    if not variables_filename_list or not variables_filename:
        raise RuntimeError("No or bad checkpoint files found in %s" %
                           export_dir)

    # Prepare the files to restore a session.
    restore_files = ""
    if checkpoint_v2 or not checkpoint_sharded:
        # For checkpoint v2 or v1 with non-sharded files, use "export" to restore
        # the session.
        restore_files = VARIABLES_FILENAME
    else:
        restore_files = VARIABLES_FILENAME_PATTERN

    # Reads meta graph file.
    meta_graph_def = meta_graph_pb2.MetaGraphDef()
    with file_io.FileIO(meta_graph_filename, "r") as f:
        logging.info("Reading metagraph from %s", meta_graph_filename)
        meta_graph_def.ParseFromString(f.read())

    collection_def = meta_graph_def.collection_def
    graph_def = tf.GraphDef()
    if GRAPH_KEY in collection_def:
        logging.info("Using value of collection %s for the graph.", GRAPH_KEY)
        # Use serving graph_def in MetaGraphDef collection_def if exists
        graph_def_any = collection_def[GRAPH_KEY].any_list.value
        if len(graph_def_any) != 1:
            raise RuntimeError(
                "Expected exactly one serving GraphDef in : %s" %
                meta_graph_def)
        else:
            graph_def_any[0].Unpack(graph_def)
            # Replace the graph def in meta graph proto.
            meta_graph_def.graph_def.CopyFrom(graph_def)

            # TODO(b/36055868): If we don't clear the collections then
            # import_meta_graph fails.
            #
            # We can't delete all the collections because some of them are used
            # by prediction to get the names of the input/output tensors.
            keys_to_delete = (set(meta_graph_def.collection_def.keys()) -
                              set(keys_used_for_serving()))
            for k in keys_to_delete:
                del meta_graph_def.collection_def[k]
    else:
        logging.info(
            "No %s found in metagraph. Using metagraph as serving graph",
            GRAPH_KEY)

    tf.reset_default_graph()
    sess = tf.Session(target, graph=None, config=config)
    # Import the graph.
    saver = tf.train.import_meta_graph(meta_graph_def)
    # Restore the session.
    if variables_filename_list[0].startswith("gs://"):
        # Make copy from GCS files.
        # TODO(b/36052034): Retire this once tensorflow can access GCS.
        try:
            temp_dir_path = tempfile.mkdtemp("local_variable_files")
            for f in variables_filename_list + additional_files_to_copy:
                file_io.copy(f, os.path.join(temp_dir_path,
                                             os.path.basename(f)))

            saver.restore(sess, os.path.join(temp_dir_path, restore_files))
        finally:
            try:
                shutil.rmtree(temp_dir_path)
            except OSError as e:
                if e.message == "Cannot call rmtree on a symbolic link":
                    # Interesting synthetic exception made up by shutil.rmtree.
                    # Means we received a symlink from mkdtemp.
                    # Also means must clean up the symlink instead.
                    os.unlink(temp_dir_path)
                else:
                    raise
    else:
        saver.restore(sess, os.path.join(export_dir, restore_files))

    init_op_tensor = None
    if INIT_OP_KEY in collection_def:
        init_ops = collection_def[INIT_OP_KEY].node_list.value
        if len(init_ops) != 1:
            raise RuntimeError("Expected exactly one serving init op in : %s" %
                               meta_graph_def)
        init_op_tensor = tf.get_collection(INIT_OP_KEY)[0]

    if init_op_tensor:
        # Run the init op.
        sess.run(fetches=[init_op_tensor])

    return sess, meta_graph_def
Example #29
0
 def testListDirectoryFailure(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   with self.assertRaises(errors.NotFoundError):
     file_io.list_directory(dir_path)
Example #30
0
def create_dir_test():
  """Verifies file_io directory handling methods."""

  # Test directory creation.
  starttime_ms = int(round(time.time() * 1000))
  dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime_ms)
  print("Creating dir %s" % dir_name)
  file_io.create_dir(dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Created directory in: %d milliseconds" % elapsed_ms)

  # Check that the directory exists.
  dir_exists = file_io.is_directory(dir_name)
  assert dir_exists
  print("%s directory exists: %s" % (dir_name, dir_exists))

  # Test recursive directory creation.
  starttime_ms = int(round(time.time() * 1000))
  recursive_dir_name = "%s/%s/%s" % (dir_name,
                                     "nested_dir1",
                                     "nested_dir2")
  print("Creating recursive dir %s" % recursive_dir_name)
  file_io.recursive_create_dir(recursive_dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Created directory recursively in: %d milliseconds" % elapsed_ms)

  # Check that the directory exists.
  recursive_dir_exists = file_io.is_directory(recursive_dir_name)
  assert recursive_dir_exists
  print("%s directory exists: %s" % (recursive_dir_name, recursive_dir_exists))

  # Create some contents in the just created directory and list the contents.
  num_files = 10
  files_to_create = ["file_%d.txt" % n for n in range(num_files)]
  for file_num in files_to_create:
    file_name = "%s/%s" % (dir_name, file_num)
    print("Creating file %s." % file_name)
    file_io.write_string_to_file(file_name, "test file.")

  print("Listing directory %s." % dir_name)
  starttime_ms = int(round(time.time() * 1000))
  directory_contents = file_io.list_directory(dir_name)
  print(directory_contents)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Listed directory %s in %s milliseconds" % (dir_name, elapsed_ms))
  assert set(directory_contents) == set(files_to_create + ["nested_dir1/"])

  # Test directory renaming.
  dir_to_rename = "%s/old_dir" % dir_name
  new_dir_name = "%s/new_dir" % dir_name
  file_io.create_dir(dir_to_rename)
  assert file_io.is_directory(dir_to_rename)
  assert not file_io.is_directory(new_dir_name)

  starttime_ms = int(round(time.time() * 1000))
  print("Will try renaming directory %s to %s" % (dir_to_rename, new_dir_name))
  file_io.rename(dir_to_rename, new_dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Renamed directory %s to %s in %s milliseconds" % (
      dir_to_rename, new_dir_name, elapsed_ms))
  assert not file_io.is_directory(dir_to_rename)
  assert file_io.is_directory(new_dir_name)

  # Test Delete directory recursively.
  print("Deleting directory recursively %s." % dir_name)
  starttime_ms = int(round(time.time() * 1000))
  file_io.delete_recursively(dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  dir_exists = file_io.is_directory(dir_name)
  assert not dir_exists
  print("Deleted directory recursively %s in %s milliseconds" % (
      dir_name, elapsed_ms))
def run_tfma(slice_spec,
             eval_model_base_dir,
             tfma_run_dir,
             input_csv,
             working_dir,
             mode,
             project,
             setup_file,
             add_metrics_callbacks=None):
    """Does model analysis, using the given spec of how to 'slice', and returns an
    EvalResult that can be used with TFMA visualization functions.
    """

    print("eval model base dir: %s" % eval_model_base_dir)
    # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become
    # available after training completes.
    retries = 0
    sleeptime = 5
    while retries < 20:
        try:
            eval_model_dir = os.path.join(
                eval_model_base_dir,
                file_io.list_directory(eval_model_base_dir)[0])
            print("eval model dir: %s" % eval_model_dir)
            if 'temp' not in eval_model_dir:
                break
            else:
                print("Sleeping %s seconds to sync with GCS..." % sleeptime)
                time.sleep(sleeptime)
                retries += 1
                sleeptime *= 2
        except Exception as e:
            print(e)
            print("Sleeping %s seconds to sync with GCS..." % sleeptime)
            time.sleep(sleeptime)
            retries += 1
            sleeptime *= 2

    schema = taxi.read_schema('schema.pbtxt')

    temp_dir = os.path.join(working_dir, 'tmp')

    if mode == 'local':
        print("mode == local")
        options = {'project': project}
        pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
        runner = 'DirectRunner'
    elif mode == 'cloud':
        print("mode == cloud")
        options = {
            'job_name': 'tfma-' + str(uuid.uuid4()),
            'temp_location': temp_dir,
            'project': project,
            'save_main_session': True,
            'setup_file': setup_file
        }
        pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
        runner = 'DataFlowRunner'
    else:
        raise ValueError("Invalid mode %s." % mode)

    display_only_data_location = input_csv

    with beam.Pipeline(runner, options=pipeline_options) as pipeline:
        with beam_impl.Context(temp_dir=temp_dir):
            csv_coder = taxi.make_csv_coder(schema)
            raw_data = (
                pipeline
                | 'ReadFromText' >> beam.io.ReadFromText(
                    input_csv,
                    # coder=beam.coders.BytesCoder(),
                    skip_header_lines=1)
                | 'ParseCSV' >> beam.Map(csv_coder.decode))

            # Examples must be in clean tf-example format.
            coder = taxi.make_proto_coder(schema)

            raw_data = (
                raw_data
                # | 'CleanData' >> beam.Map(taxi.clean_raw_data_dict)
                | 'ToSerializedTFExample' >> beam.Map(coder.encode))

            _ = raw_data | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
                eval_saved_model_path=eval_model_dir,
                slice_spec=slice_spec,
                output_path=tfma_run_dir,
                add_metrics_callbacks=add_metrics_callbacks,
                display_only_data_location=input_csv)

    return tfma.load_eval_result(output_path=tfma_run_dir)
def main():
  parser = argparse.ArgumentParser(description='ML Trainer')
  parser.add_argument(
      '--model_name',
      help='...',
      required=True)

  parser.add_argument(
      '--model_path',
      help='...',
      required=True)

  parser.add_argument('--cluster', type=str,
                      help='GKE cluster set up for kubeflow. If set, zone must be provided. ' +
                           'If not set, assuming this runs in a GKE container and current ' +
                           'cluster is used.')
  parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.')
  args = parser.parse_args()

  KUBEFLOW_NAMESPACE = 'kubeflow'

  # Make sure model dir exists before proceeding
  retries = 0
  sleeptime = 5
  while retries < 20:
    try:
      model_dir = os.path.join(args.model_path, file_io.list_directory(args.model_path)[-1])
      print("model subdir: %s" % model_dir)
      break
    except Exception as e:  #pylint: disable=broad-except
      print(e)
      print("Sleeping %s seconds to sync with GCS..." % sleeptime)
      time.sleep(sleeptime)
      retries += 1
      sleeptime *= 2
  if retries >= 20:
    print("could not get model subdir from %s, exiting" % args.model_path)
    exit(1)

  logging.getLogger().setLevel(logging.INFO)
  args_dict = vars(args)
  if args.cluster and args.zone:
    cluster = args_dict.pop('cluster')  #pylint: disable=unused-variable
    zone = args_dict.pop('zone')  #pylint: disable=unused-variable
  else:
    # Get cluster name and zone from metadata
    metadata_server = "http://metadata/computeMetadata/v1/instance/"
    metadata_flavor = {'Metadata-Flavor' : 'Google'}
    cluster = requests.get(metadata_server + "attributes/cluster-name",
                           headers=metadata_flavor).text
    zone = requests.get(metadata_server + "zone",
                        headers=metadata_flavor).text.split('/')[-1]

  # logging.info('Getting credentials for GKE cluster %s.' % cluster)
  # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster,
  #                  '--zone', zone])

  logging.info('Generating training template.')

  template_file = os.path.join(
      os.path.dirname(os.path.realpath(__file__)), 'tf-serve-template.yaml')
  target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tf-serve.yaml')

  with open(template_file, 'r') as f:
    with open(target_file, "w") as target:
      data = f.read()
      changed = data.replace('MODEL_NAME', args.model_name)
      changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE)
      changed2 = changed1.replace('MODEL_PATH', args.model_path)
      target.write(changed2)


  logging.info('deploying model serving.')
  subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml'])
def train(args):
    def get_dataset():
        images = []
        labels = []
        local_zip = IO_utils.load_data(args.training_dir, LOCAL_TRAIN_FILE)
        print("zip path = ", local_zip)
        with file_io.FileIO(args.training_dir, 'r') as f:
            with ZipFile(f, 'r') as archive:
                file_list = archive.infolist()
                random.shuffle(file_list)
                print("image number", len(file_list))
                for entry in file_list:
                    with archive.open(entry) as file:
                        try:
                            open_img = Image.open(file)
                            images.append(np.array(open_img))
                            label = np.zeros(num_classes)
                            label = define_label_one_hot(file.name, label)
                            labels.append(label)
                        except Exception as error:
                            print(error)
        images = np.array(images)
        labels = np.array(labels)
        x_train, x_test, y_train, y_test = train_test_split(images,
                                                            labels,
                                                            test_size=0.1,
                                                            random_state=42)
        x_train = x_train / 255.0
        x_test = x_test / 255.0
        return x_train, x_test, y_train, y_test

    def define_label_one_hot(img, label):
        for c in class_names:
            if c in os.path.basename(img):
                label[class_names.index(c)] = 1
        return label

    def define_label(img, label):
        for c in class_names:
            if c in os.path.basename(img):
                label = class_names.index(c)
        return label

    with file_io.FileIO(args.label_file, 'r') as f:
        class_names = f.read().split(",")
    num_classes = len(class_names)
    input_shape = (args.img_size, args.img_size, args.channel)
    print("labels", class_names)
    print("num_class", num_classes)
    X_train, X_test, y_train, y_test = get_dataset()
    print("Xtrain", X_train.shape)
    print("Ytrain", y_train.shape)
    model = model_helper.init_cnn_model(input_shape, num_classes)
    model.summary()
    # checkpoint
    checkpoint = ModelCheckpoint(LOCAL_MODEL_FILE,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max')
    model.fit(
        X_train,
        y_train,
        batch_size=args.batch_size,
        epochs=args.epochs,
        validation_data=(X_test, y_test),
        callbacks=[checkpoint,
                   model_helper.get_tensorboard_config("logs")],
        verbose=1)
    score = model.evaluate(X_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    print("Predict :", model.predict(X_test[:4]))
    model.save("last-" + LOCAL_MODEL_FILE)
    # predict first 4 images in the test set
    print("Predict :", model.predict(X_test[:4]))
    logs = file_io.list_directory("logs")
    print("logs = ", logs)
    IO_utils.save_file_in_cloud("last-" + LOCAL_MODEL_FILE,
                                args.job_dir + "/" + 'last-' + args.job_name)
    IO_utils.save_file_in_cloud(LOCAL_MODEL_FILE,
                                args.job_dir + "/" + args.job_name)
    for entry in logs:
        IO_utils.save_file_in_cloud("logs/" + entry,
                                    args.job_dir + "/logs/" + entry)
Example #34
0
def create_dir_test():
  """Verifies file_io directory handling methods."""

  # Test directory creation.
  starttime_ms = int(round(time.time() * 1000))
  dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime_ms)
  print("Creating dir %s" % dir_name)
  file_io.create_dir(dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Created directory in: %d milliseconds" % elapsed_ms)

  # Check that the directory exists.
  dir_exists = file_io.is_directory(dir_name)
  assert dir_exists
  print("%s directory exists: %s" % (dir_name, dir_exists))

  # Test recursive directory creation.
  starttime_ms = int(round(time.time() * 1000))
  recursive_dir_name = "%s/%s/%s" % (dir_name,
                                     "nested_dir1",
                                     "nested_dir2")
  print("Creating recursive dir %s" % recursive_dir_name)
  file_io.recursive_create_dir(recursive_dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Created directory recursively in: %d milliseconds" % elapsed_ms)

  # Check that the directory exists.
  recursive_dir_exists = file_io.is_directory(recursive_dir_name)
  assert recursive_dir_exists
  print("%s directory exists: %s" % (recursive_dir_name, recursive_dir_exists))

  # Create some contents in the just created directory and list the contents.
  num_files = 10
  files_to_create = ["file_%d.txt" % n for n in range(num_files)]
  for file_num in files_to_create:
    file_name = "%s/%s" % (dir_name, file_num)
    print("Creating file %s." % file_name)
    file_io.write_string_to_file(file_name, "test file.")

  print("Listing directory %s." % dir_name)
  starttime_ms = int(round(time.time() * 1000))
  directory_contents = file_io.list_directory(dir_name)
  print(directory_contents)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Listed directory %s in %s milliseconds" % (dir_name, elapsed_ms))
  assert set(directory_contents) == set(files_to_create + ["nested_dir1/"])

  # Test directory renaming.
  dir_to_rename = "%s/old_dir" % dir_name
  new_dir_name = "%s/new_dir" % dir_name
  file_io.create_dir(dir_to_rename)
  assert file_io.is_directory(dir_to_rename)
  assert not file_io.is_directory(new_dir_name)

  starttime_ms = int(round(time.time() * 1000))
  print("Will try renaming directory %s to %s" % (dir_to_rename, new_dir_name))
  file_io.rename(dir_to_rename, new_dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  print("Renamed directory %s to %s in %s milliseconds" % (
      dir_to_rename, new_dir_name, elapsed_ms))
  assert not file_io.is_directory(dir_to_rename)
  assert file_io.is_directory(new_dir_name)

  # Test Delete directory recursively.
  print("Deleting directory recursively %s." % dir_name)
  starttime_ms = int(round(time.time() * 1000))
  file_io.delete_recursively(dir_name)
  elapsed_ms = int(round(time.time() * 1000)) - starttime_ms
  dir_exists = file_io.is_directory(dir_name)
  assert not dir_exists
  print("Deleted directory recursively %s in %s milliseconds" % (
      dir_name, elapsed_ms))
def main(argv=None):
    parser = argparse.ArgumentParser(description='ML Trainer')
    parser.add_argument('--model_name', help='...', required=True)

    parser.add_argument('--model_path', help='...', required=True)

    parser.add_argument('--aws_secret', help='...', required=False)

    parser.add_argument('--s3-endpoint', help="""...""", required=False)

    parser.add_argument(
        '--cluster',
        type=str,
        help='GKE cluster set up for kubeflow. If set, zone must be provided. '
        + 'If not set, assuming this runs in a GKE container and current ' +
        'cluster is used.')
    parser.add_argument('--zone',
                        type=str,
                        help='zone of the kubeflow cluster.')
    args = parser.parse_args()

    # KUBEFLOW_NAMESPACE = 'default'
    KUBEFLOW_NAMESPACE = 'kubeflow'

    # Make sure model dir exists before proceeding
    retries = 0
    sleeptime = 5
    while retries < 20:
        try:
            model_dir = os.path.join(
                args.model_path,
                file_io.list_directory(args.model_path)[-1])
            print("model subdir: %s" % model_dir)
            break
        except Exception as e:
            print(e)
            print("Sleeping %s seconds to sync with GCS..." % sleeptime)
            time.sleep(sleeptime)
            retries += 1
            sleeptime *= 2
    if retries >= 20:
        print("could not get model subdir from %s, exiting" % args.model_path)
        exit(1)

    logging.getLogger().setLevel(logging.INFO)
    logging.info('Running deploy-tf-serve.py')

    logging.info('Generating training template.')

    if args.aws_secret:
        template_file = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'tf-serve-creds-template.yaml')
    else:
        template_file = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'tf-serve-template.yaml')
    target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'tf-serve.yaml')

    aws_region = os.environ.get('AWS_REGION', 'us-east-1')

    # o = urlparse(args.model_path)
    # if o.scheme == 's3':
    #   client = s3_client(args.s3_endpoint)
    # override model s3 location region and endpoint
    # so tensorflow could access it without troubles
    # aws_region = client.get_bucket_location(
    #                         Bucket=o.netloc
    #                     ).get('LocationConstraint', 'us-east-1')

    with open(template_file, 'r') as f:
        with open(target_file, "w") as target:
            data = f.read()
            changed = data.replace('MODEL_NAME', args.model_name)
            changed = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE)
            changed = changed.replace('MODEL_PATH', args.model_path)
            changed = changed.replace('BUCKET_REGION', aws_region)
            if args.aws_secret:
                changed = changed.replace('AWS_SECRET_NAME', args.aws_secret)
            target.write(changed)

    logging.info('deploying model serving.')
    subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml'])
Example #36
0
def read_image_file_names(dir_path):
    """Returns a list of absolute file paths for relative dir input with all relevant file names."""
    return [
        os.path.join(dir_path, p) for p in file_io.list_directory(dir_path)
    ]
def run_tfma(slice_spec, eval_model_base_dir, tfma_run_dir, input_csv,
             working_dir, mode, project, setup_file, add_metrics_callbacks=None):
    """Does model analysis, using the given spec of how to 'slice', and returns an
    EvalResult that can be used with TFMA visualization functions.
    """

    print("eval model base dir: %s" % eval_model_base_dir)
    # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become
    # available after training completes.
    retries = 0
    sleeptime = 5
    while retries < 20:
      try:
        eval_model_dir = os.path.join(
            eval_model_base_dir, file_io.list_directory(eval_model_base_dir)[0])
        print("eval model dir: %s" % eval_model_dir)
        if 'temp' not in eval_model_dir:
          break
        else:
          print("Sleeping %s seconds to sync with GCS..." % sleeptime)
          time.sleep(sleeptime)
          retries += 1
          sleeptime *= 2
      except Exception as e:
        print(e)
        print("Sleeping %s seconds to sync with GCS..." % sleeptime)
        time.sleep(sleeptime)
        retries += 1
        sleeptime *= 2


    schema = taxi.read_schema('schema.pbtxt')

    temp_dir = os.path.join(working_dir, 'tmp')

    if mode == 'local':
      print("mode == local")
      options = {
        'project': project}
      pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
      runner = 'DirectRunner'
    elif mode == 'cloud':
      print("mode == cloud")
      options = {
        'job_name': 'tfma-' + str(uuid.uuid4()),
        'temp_location': temp_dir,
        'project': project,
        'save_main_session': True,
        'setup_file': setup_file
      }
      pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options)
      runner = 'DataFlowRunner'
    else:
      raise ValueError("Invalid mode %s." % mode)

    display_only_data_location = input_csv

    with beam.Pipeline(runner, options=pipeline_options) as pipeline:
      with beam_impl.Context(temp_dir=temp_dir):
        csv_coder = taxi.make_csv_coder(schema)
        raw_data = (
            pipeline
            | 'ReadFromText' >> beam.io.ReadFromText(
                input_csv,
                # coder=beam.coders.BytesCoder(),
                skip_header_lines=1)
            | 'ParseCSV' >> beam.Map(csv_coder.decode))

        # Examples must be in clean tf-example format.
        coder = taxi.make_proto_coder(schema)

        raw_data = (
            raw_data
            # | 'CleanData' >> beam.Map(taxi.clean_raw_data_dict)
            | 'ToSerializedTFExample' >> beam.Map(coder.encode))

        _ = raw_data | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
            eval_saved_model_path=eval_model_dir,
            slice_spec=slice_spec,
            output_path=tfma_run_dir,
            add_metrics_callbacks=add_metrics_callbacks,
            display_only_data_location=input_csv)

    return tfma.load_eval_result(output_path=tfma_run_dir)
Example #38
0
def main():
  parser = argparse.ArgumentParser(description='ML Trainer')
  parser.add_argument(
      '--model_name',
      help='...',
      required=True)

  parser.add_argument(
      '--model_path',
      help='...',
      required=True)

  parser.add_argument('--cluster', type=str,
                      help='GKE cluster set up for kubeflow. If set, zone must be provided. ' +
                           'If not set, assuming this runs in a GKE container and current ' +
                           'cluster is used.')
  parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.')
  args = parser.parse_args()

  KUBEFLOW_NAMESPACE = 'kubeflow'
  ts = str(int(time.time()))

  # Make sure model dir exists before proceeding
  retries = 0
  sleeptime = 5
  while retries < 20:
    try:
      model_dir = os.path.join(args.model_path, file_io.list_directory(args.model_path)[-1])
      print("model subdir: %s" % model_dir)
      break
    except Exception as e:  #pylint: disable=broad-except
      print(e)
      print("Sleeping %s seconds to sync with GCS..." % sleeptime)
      time.sleep(sleeptime)
      retries += 1
      sleeptime *= 2
  if retries >= 20:
    print("could not get model subdir from %s, exiting" % args.model_path)
    exit(1)

  logging.getLogger().setLevel(logging.INFO)
  args_dict = vars(args)
  if args.cluster and args.zone:
    cluster = args_dict.pop('cluster')  #pylint: disable=unused-variable
    zone = args_dict.pop('zone')  #pylint: disable=unused-variable
  else:
    # Get cluster name and zone from metadata
    metadata_server = "http://metadata/computeMetadata/v1/instance/"
    metadata_flavor = {'Metadata-Flavor' : 'Google'}
    cluster = requests.get(metadata_server + "attributes/cluster-name",
                           headers=metadata_flavor).text
    zone = requests.get(metadata_server + "zone",
                        headers=metadata_flavor).text.split('/')[-1]

  # logging.info('Getting credentials for GKE cluster %s.' % cluster)
  # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster,
  #                  '--zone', zone])

  logging.info('Generating training template.')

  template_file = os.path.join(
      os.path.dirname(os.path.realpath(__file__)), 'tf-serve-template.yaml')
  target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tf-serve.yaml')

  with open(template_file, 'r') as f:
    with open(target_file, "w") as target:
      data = f.read()
      changed = data.replace('MODEL_NAME', args.model_name)
      changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE)
      changed2 = changed1.replace('MODEL_PATH', args.model_path)
      changed3 = changed2.replace('SERVICE_NAME', args.model_name + ts)
      target.write(changed3)
      logging.info("template: %s", changed3)


  logging.info('deploying model serving.')
  subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml'])
Example #39
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='ML Trainer')
    parser.add_argument('--project',
                        help='The GCS project to use',
                        required=True)
    parser.add_argument(
        '--gcs-path',
        help=
        'The GCS path to the trained model. The path should end with "../export/<model-name>".',
        required=True)
    parser.add_argument('--version-name',
                        help='The model version name.',
                        required=True)

    parser.add_argument('--model-name',
                        help='The model name.',
                        default='taxifare')

    parser.add_argument('--region',
                        help='The model region.',
                        default='us-central1')

    args = parser.parse_args()

    # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become
    # available after training completes.
    retries = 0
    sleeptime = 5
    while retries < 20:
        try:
            model_location = os.path.join(
                args.gcs_path,
                file_io.list_directory(args.gcs_path)[-1])
            print("model location: %s" % model_location)
            break
        except Exception as e:
            print(e)
            print("Sleeping %s seconds to wait for GCS files..." % sleeptime)
            time.sleep(sleeptime)
            retries += 1
            sleeptime *= 2
    if retries >= 20:
        print("could not get model location subdir from %s, exiting" %
              args.gcs_path)
        exit(1)

    model_create_command = [
        'gcloud', 'ml-engine', 'models', 'create', args.model_name,
        '--regions', args.region, '--project', args.project
    ]
    print(model_create_command)
    result = subprocess.call(model_create_command)
    print(result)

    proper_version_name = args.version_name.replace('-', '_')
    print("using version name: %s" % proper_version_name)

    model_deploy_command = [
        'gcloud', 'ml-engine', 'versions', 'create', proper_version_name,
        '--model', args.model_name, '--runtime-version', '1.6', '--project',
        args.project, '--origin', model_location
    ]
    print(model_deploy_command)
    result2 = subprocess.call(model_deploy_command)
    print(result2)
Example #40
0
      type=int)
  parser.add_argument(
      '--eval-steps',
      help='Number of steps to run evalution for at each checkpoint',
      default=100,
      type=int)
  args = parser.parse_args()

  # Set python level verbosity
  tf.logging.set_verbosity(args.verbosity)
  # Set C++ Graph Execution level verbosity
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(
      tf.logging.__dict__[args.verbosity] / 10)

  train_files = []
  tflist = file_io.list_directory(args.train_files_dir)
  for x in tflist:
    if args.train_files_prefix in x:
      train_files.append(os.path.join(args.train_files_dir, x))
  print("train files list: %s" % train_files)

  eval_files = []
  eflist = file_io.list_directory(args.eval_files_dir)
  for x in eflist:
    if args.eval_files_prefix in x:
      eval_files.append(os.path.join(args.eval_files_dir, x))
  print("eval files list: %s" % eval_files)

  # Run the training job
  hparams = tf.contrib.training.HParams(**args.__dict__)
  run_experiment(train_files, eval_files, hparams)