def testStat(self): file_path = os.path.join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") file_statistics = file_io.stat(file_path) os_statistics = os.stat(file_path) self.assertEqual(7, file_statistics.length) self.assertEqual(int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9)) self.assertFalse(file_statistics.is_directory)
def testStat(self): file_path = os.path.join(self._base_dir, "temp_file") file_io.write_string_to_file(file_path, "testing") file_statistics = file_io.stat(file_path) os_statistics = os.stat(file_path) self.assertEquals(7, file_statistics.length) self.assertEqual( int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9))
def testStat(self): file_path = os.path.join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") file_statistics = file_io.stat(file_path) os_statistics = os.stat(file_path) self.assertEquals(7, file_statistics.length) self.assertEqual( int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9)) # 644 and 666 are the two possible default permissions of newly-created # files. self.assertTrue(file_statistics.mode in [0o100644, 0o100666])
def testIsDirectory(self): dir_path = os.path.join(self._base_dir, "test_dir") # Failure for a non-existing dir. self.assertFalse(file_io.is_directory(dir_path)) file_io.create_dir(dir_path) self.assertTrue(file_io.is_directory(dir_path)) file_path = os.path.join(dir_path, "test_file") file_io.FileIO(file_path, mode="w").write("test") # False for a file. self.assertFalse(file_io.is_directory(file_path)) # Test that the value returned from `stat()` has `is_directory` set. file_statistics = file_io.stat(dir_path) self.assertTrue(file_statistics.is_directory)
def testIsDirectory(self, join): dir_path = join(self._base_dir, "test_dir") # Failure for a non-existing dir. self.assertFalse(file_io.is_directory(dir_path)) file_io.create_dir(dir_path) self.assertTrue(file_io.is_directory(dir_path)) file_path = join(str(dir_path), "test_file") file_io.FileIO(file_path, mode="w").write("test") # False for a file. self.assertFalse(file_io.is_directory(file_path)) # Test that the value returned from `stat()` has `is_directory` set. file_statistics = file_io.stat(dir_path) self.assertTrue(file_statistics.is_directory)
def load(self) -> T5ForConditionalGeneration: try: if not self.flush_cache: return self._fix_t5_model( T5ForConditionalGeneration.from_pretrained( str(self.model_cache_dir), from_tf=True, force_download=False)) except (RuntimeError, OSError): logging.info('T5 model weights not in cache.') m = re.search(r'model_checkpoint_path: "(.+?)"', self.ckpt_prefix) assert m is not None, 'checkpoint file malformed' # Copy over checkpoint data ckpt_patt = re.compile( rf'^{m.group(1)}\.(data-\d+-of-\d+|index|meta)$') for name in file_io.list_directory(self.url): if not ckpt_patt.match(name): continue url = os.path.join(self.url, name) url_stat = file_io.stat(url) cache_file_path = self.model_cache_dir / ckpt_patt.sub( rf'{TRANSFO_PREFIX}.\1', name) try: cs = os.stat(str(cache_file_path)) if cs.st_size == url_stat.length and cs.st_mtime_ns > url_stat.mtime_nsec and not self.flush_cache: logging.info(f'Skipping {name}...') continue except FileNotFoundError: pass logging.info(f'Caching {name}...') file_io.copy(url, str(cache_file_path), overwrite=True) # Transformers expects a model config.json config = T5Config.from_pretrained(self.model_type) with open(str(self.model_cache_dir / 'config.json'), 'w') as f: json.dump(config.__dict__, f, indent=4) return self._fix_t5_model( T5ForConditionalGeneration.from_pretrained(str( self.model_cache_dir), from_tf=True, force_download=False))
def match_maybe_append(pathname): fnames = file_io.get_matching_files(pathname) if fnames: mtimes.append(file_io.stat(fnames[0]).mtime_nsec / 1e9) return True return False
dcm_data = pydicom.read_file(BytesIO(dcm_obj['Body'].read())) im = dcm_data.pixel_array mean_intensity.append(im.mean()) im.mean() # Parse patient labels and bounding boxes into dictionary # parsed_df = ingest.parse_training_labels( # train_box_df=train_box_df, # train_image_dirpath=S3_STAGE1_TRAIN_IMAGE_DIR) # print(parsed_df['0004cfab-14fd-4e49-80ba-63a80b6bddd6']) # print(parsed_df['00436515-870c-4b36-a041-de91049b9ab4']) # Visualize bounding boxes for single patientId # ingest.draw(parsed_df=parsed_df, # patient_id='00436515-870c-4b36-a041-de91049b9ab4') # Check that TensorFlow can read the S3 files from tensorflow.python.lib.io import file_io print(file_io.stat(S3_CLASS_INFO_PATH)) filenames = [ "s3://lungbox/raw/stage_1_test_images/000924cf-0f8d-42bd-9158-1af53881a557.dcm", "s3://lungbox/raw/stage_1_test_images/000db696-cf54-4385-b10b-6b16fbb3f985.dcm", "s3://lungbox/raw/stage_1_test_images/000fe35a-2649-43d4-b027-e67796d412e0.dcm", "s3://lungbox/raw/stage_1_test_images/001031d9-f904-4a23-b3e5-2c088acd19c6.dcm", "s3://lungbox/raw/stage_1_test_images/0010f549-b242-4e94-87a8-57d79de215fc.dcm" ] dataset = tf.data.TFRecordDataset(filenames) print(dataset)
# To test your setup from tensorflow.python.lib.io import file_io print file_io.stat('s3://bucketname/path/') # <tensorflow.python.pywrap_tensorflow_internal.FileStatistics; # proxy of <Swig Object of type 'tensorflow::FileStatistics *' # at 0x10c2171b0> > # Reading Data filenames = [ "s3://bucketname/path/to/file1.tfrecord", "s3://bucketname/path/to/file2.tfrecord" ] dataset = tf.data.TFRecordDataset(filenames)
def _more_recent_emb_file_exists(self, request): filepath = self._get_filepath(request.id) if not file_io.file_exists(filepath): return False file_ts = file_io.stat(filepath).mtime_nsec / 1000000000 return file_ts >= request.created_at_ts
def get_mtime(filepath): if file_io.file_exists(filepath): return file_io.stat(filepath).mtime_nsec return None
}, secret_key_secret={ "name": "mlpipeline-minio-artifact", "key": "accesssecret" }) # artifacts in this op are stored to endpoint `minio-service.<namespace>:9000` op = dsl.ContainerOp(name="foo", image="busybox:%s" % tag, artifact_location=pipeline_artifact_location) if __name__ == '__main__': import os os.environ["AWS_ACCESS_KEY_ID"] = "minio" os.environ["AWS_SECRET_ACCESS_KEY"] = "minio123" os.environ["S3_USE_HTTPS"] = "0" os.environ["S3_VERIFY_SSL"] = "0" os.environ["S3_ENDPOINT"] = "minio-service.kubeflow:9000" os.environ["S3_REQUEST_TIMEOUT_MSEC"] = "600000" from tensorflow.python.lib.io import file_io print(file_io.stat('s3://orain/')) output = '/home/jovyan/data-vol-1/mnist/pipeline-test-minio-fix9/7692858b-824f-4cf9-b286-0e50a34e4b1d/tfx-taxi-cab-classification-pipeline-example-tt6s6-902552594/data/' if not os.path.exists(output): os.makedirs(output) kfp.compiler.Compiler().compile(custom_artifact_location, __file__ + '.zip')