Example #1
0
 def testStat(self):
     file_path = os.path.join(self._base_dir, "temp_file")
     file_io.FileIO(file_path, mode="w").write("testing")
     file_statistics = file_io.stat(file_path)
     os_statistics = os.stat(file_path)
     self.assertEqual(7, file_statistics.length)
     self.assertEqual(int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9))
     self.assertFalse(file_statistics.is_directory)
Example #2
0
 def testStat(self):
   file_path = os.path.join(self._base_dir, "temp_file")
   file_io.write_string_to_file(file_path, "testing")
   file_statistics = file_io.stat(file_path)
   os_statistics = os.stat(file_path)
   self.assertEquals(7, file_statistics.length)
   self.assertEqual(
       int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9))
Example #3
0
  def testStat(self):
    file_path = os.path.join(self._base_dir, "temp_file")
    file_io.FileIO(file_path, mode="w").write("testing")
    file_statistics = file_io.stat(file_path)
    os_statistics = os.stat(file_path)
    self.assertEquals(7, file_statistics.length)
    self.assertEqual(
        int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9))

    # 644 and 666 are the two possible default permissions of newly-created
    # files.
    self.assertTrue(file_statistics.mode in [0o100644, 0o100666])
Example #4
0
 def testIsDirectory(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   # Failure for a non-existing dir.
   self.assertFalse(file_io.is_directory(dir_path))
   file_io.create_dir(dir_path)
   self.assertTrue(file_io.is_directory(dir_path))
   file_path = os.path.join(dir_path, "test_file")
   file_io.FileIO(file_path, mode="w").write("test")
   # False for a file.
   self.assertFalse(file_io.is_directory(file_path))
   # Test that the value returned from `stat()` has `is_directory` set.
   file_statistics = file_io.stat(dir_path)
   self.assertTrue(file_statistics.is_directory)
Example #5
0
 def testIsDirectory(self, join):
     dir_path = join(self._base_dir, "test_dir")
     # Failure for a non-existing dir.
     self.assertFalse(file_io.is_directory(dir_path))
     file_io.create_dir(dir_path)
     self.assertTrue(file_io.is_directory(dir_path))
     file_path = join(str(dir_path), "test_file")
     file_io.FileIO(file_path, mode="w").write("test")
     # False for a file.
     self.assertFalse(file_io.is_directory(file_path))
     # Test that the value returned from `stat()` has `is_directory` set.
     file_statistics = file_io.stat(dir_path)
     self.assertTrue(file_statistics.is_directory)
Example #6
0
    def load(self) -> T5ForConditionalGeneration:
        try:
            if not self.flush_cache:
                return self._fix_t5_model(
                    T5ForConditionalGeneration.from_pretrained(
                        str(self.model_cache_dir),
                        from_tf=True,
                        force_download=False))
        except (RuntimeError, OSError):
            logging.info('T5 model weights not in cache.')
        m = re.search(r'model_checkpoint_path: "(.+?)"', self.ckpt_prefix)
        assert m is not None, 'checkpoint file malformed'

        # Copy over checkpoint data
        ckpt_patt = re.compile(
            rf'^{m.group(1)}\.(data-\d+-of-\d+|index|meta)$')
        for name in file_io.list_directory(self.url):
            if not ckpt_patt.match(name):
                continue
            url = os.path.join(self.url, name)
            url_stat = file_io.stat(url)
            cache_file_path = self.model_cache_dir / ckpt_patt.sub(
                rf'{TRANSFO_PREFIX}.\1', name)
            try:
                cs = os.stat(str(cache_file_path))
                if cs.st_size == url_stat.length and cs.st_mtime_ns > url_stat.mtime_nsec and not self.flush_cache:
                    logging.info(f'Skipping {name}...')
                    continue
            except FileNotFoundError:
                pass
            logging.info(f'Caching {name}...')
            file_io.copy(url, str(cache_file_path), overwrite=True)

        # Transformers expects a model config.json
        config = T5Config.from_pretrained(self.model_type)
        with open(str(self.model_cache_dir / 'config.json'), 'w') as f:
            json.dump(config.__dict__, f, indent=4)
        return self._fix_t5_model(
            T5ForConditionalGeneration.from_pretrained(str(
                self.model_cache_dir),
                                                       from_tf=True,
                                                       force_download=False))
Example #7
0
 def match_maybe_append(pathname):
   fnames = file_io.get_matching_files(pathname)
   if fnames:
     mtimes.append(file_io.stat(fnames[0]).mtime_nsec / 1e9)
     return True
   return False
Example #8
0
        dcm_data = pydicom.read_file(BytesIO(dcm_obj['Body'].read()))
        im = dcm_data.pixel_array
        mean_intensity.append(im.mean())

im.mean()

# Parse patient labels and bounding boxes into dictionary
# parsed_df = ingest.parse_training_labels(
#     train_box_df=train_box_df,
#     train_image_dirpath=S3_STAGE1_TRAIN_IMAGE_DIR)
# print(parsed_df['0004cfab-14fd-4e49-80ba-63a80b6bddd6'])
# print(parsed_df['00436515-870c-4b36-a041-de91049b9ab4'])

# Visualize bounding boxes for single patientId
# ingest.draw(parsed_df=parsed_df,
#             patient_id='00436515-870c-4b36-a041-de91049b9ab4')

# Check that TensorFlow can read the S3 files
from tensorflow.python.lib.io import file_io
print(file_io.stat(S3_CLASS_INFO_PATH))

filenames = [
    "s3://lungbox/raw/stage_1_test_images/000924cf-0f8d-42bd-9158-1af53881a557.dcm",
    "s3://lungbox/raw/stage_1_test_images/000db696-cf54-4385-b10b-6b16fbb3f985.dcm",
    "s3://lungbox/raw/stage_1_test_images/000fe35a-2649-43d4-b027-e67796d412e0.dcm",
    "s3://lungbox/raw/stage_1_test_images/001031d9-f904-4a23-b3e5-2c088acd19c6.dcm",
    "s3://lungbox/raw/stage_1_test_images/0010f549-b242-4e94-87a8-57d79de215fc.dcm"
]
dataset = tf.data.TFRecordDataset(filenames)
print(dataset)
 def match_maybe_append(pathname):
   fnames = file_io.get_matching_files(pathname)
   if fnames:
     mtimes.append(file_io.stat(fnames[0]).mtime_nsec / 1e9)
     return True
   return False
Example #10
0
# To test your setup
from tensorflow.python.lib.io import file_io

print file_io.stat('s3://bucketname/path/')

# <tensorflow.python.pywrap_tensorflow_internal.FileStatistics;
# proxy of <Swig Object of type 'tensorflow::FileStatistics *'
# at 0x10c2171b0> >

# Reading Data
filenames = [
    "s3://bucketname/path/to/file1.tfrecord",
    "s3://bucketname/path/to/file2.tfrecord"
]

dataset = tf.data.TFRecordDataset(filenames)
Example #11
0
 def _more_recent_emb_file_exists(self, request):
     filepath = self._get_filepath(request.id)
     if not file_io.file_exists(filepath):
         return False
     file_ts = file_io.stat(filepath).mtime_nsec / 1000000000
     return file_ts >= request.created_at_ts
Example #12
0
 def get_mtime(filepath):
     if file_io.file_exists(filepath):
         return file_io.stat(filepath).mtime_nsec
     return None
        },
        secret_key_secret={
            "name": "mlpipeline-minio-artifact",
            "key": "accesssecret"
        })

    # artifacts in this op are stored to endpoint `minio-service.<namespace>:9000`
    op = dsl.ContainerOp(name="foo",
                         image="busybox:%s" % tag,
                         artifact_location=pipeline_artifact_location)


if __name__ == '__main__':
    import os
    os.environ["AWS_ACCESS_KEY_ID"] = "minio"
    os.environ["AWS_SECRET_ACCESS_KEY"] = "minio123"
    os.environ["S3_USE_HTTPS"] = "0"
    os.environ["S3_VERIFY_SSL"] = "0"
    os.environ["S3_ENDPOINT"] = "minio-service.kubeflow:9000"
    os.environ["S3_REQUEST_TIMEOUT_MSEC"] = "600000"

    from tensorflow.python.lib.io import file_io

    print(file_io.stat('s3://orain/'))
    output = '/home/jovyan/data-vol-1/mnist/pipeline-test-minio-fix9/7692858b-824f-4cf9-b286-0e50a34e4b1d/tfx-taxi-cab-classification-pipeline-example-tt6s6-902552594/data/'

    if not os.path.exists(output):
        os.makedirs(output)

    kfp.compiler.Compiler().compile(custom_artifact_location,
                                    __file__ + '.zip')