def to_literal( self, ctx: FlyteContext, python_val: FlyteDirectory, python_type: typing.Type[FlyteDirectory], expected: LiteralType, ) -> Literal: remote_directory = None should_upload = True # There are two kinds of literals we handle, either an actual FlyteDirectory, or a string path to a directory. # Handle the FlyteDirectory case if isinstance(python_val, FlyteDirectory): source_path = python_val.path if python_val.remote_directory is False: # If the user specified the remote_path to be False, that means no matter what, do not upload should_upload = False else: # Otherwise, if not an "" use the user-specified remote path instead of the random one remote_directory = python_val.remote_directory or None # Handle the string case else: if not (isinstance(python_val, os.PathLike) or isinstance(python_val, str)): raise AssertionError( f"Expected FlyteDirectory or os.PathLike object, received {type(python_val)}" ) source_path = python_val # Only do this check if it's a local directory. if not ctx.file_access.is_remote(source_path): p = Path(source_path) if not p.is_dir(): raise AssertionError( f"Expected a directory. {source_path} is not a directory" ) # For remote values, say s3://some/extant/dir/, we will not upload to Flyte's store (S3/GCS) # and just return a literal with a uri equal to the path given if ctx.file_access.is_remote(source_path) or not should_upload: meta = BlobMetadata(type=self._blob_type( format=self.get_format(python_type))) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=source_path))) # For local paths, we will upload to the Flyte store (note that for local execution, the remote store is just # a subfolder), unless remote_path=False was given else: if remote_directory is None: remote_directory = ctx.file_access.get_random_remote_directory( ) ctx.file_access.put_data(source_path, remote_directory, is_multipart=True) meta = BlobMetadata(type=self._blob_type( format=self.get_format(python_type))) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=remote_directory)))
def to_literal( self, ctx: FlyteContext, python_val: typing.Union[FlyteFile, os.PathLike, str], python_type: typing.Type[FlyteFile], expected: LiteralType, ) -> Literal: remote_path = None should_upload = True if python_val is None: raise AssertionError("None value cannot be converted to a file.") if isinstance(python_val, FlyteFile): # If the object has a remote source, then we just convert it back. if python_val._remote_source is not None: meta = BlobMetadata(type=self._blob_type( format=self.get_format(python_type))) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=python_val._remote_source))) source_path = python_val.path if python_val.remote_path is False: # If the user specified the remote_path to be False, that means no matter what, do not upload should_upload = False else: # Otherwise, if not an "" use the user-specified remote path instead of the random one remote_path = python_val.remote_path or None else: if not (isinstance(python_val, os.PathLike) or isinstance(python_val, str)): raise AssertionError( f"Expected FlyteFile or os.PathLike object, received {type(python_val)}" ) source_path = python_val # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS) # and just return a literal with a uri equal to the path given if ctx.file_access.is_remote(source_path) or not should_upload: # TODO: Add copying functionality so that FlyteFile(path="s3://a", remote_path="s3://b") will copy. meta = BlobMetadata(type=self._blob_type( format=FlyteFilePathTransformer.get_format(python_type))) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=source_path))) # For local paths, we will upload to the Flyte store (note that for local execution, the remote store is just # a subfolder), unless remote_path=False was given else: if remote_path is None: remote_path = ctx.file_access.get_random_remote_path( source_path) ctx.file_access.put_data(source_path, remote_path, is_multipart=False) meta = BlobMetadata(type=self._blob_type( format=FlyteFilePathTransformer.get_format(python_type))) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=remote_path or source_path)))
def to_literal( self, ctx: FlyteContext, python_val: os.PathLike, python_type: Type[os.PathLike], expected: LiteralType ) -> Literal: # TODO we could guess the mimetype and allow the format to be changed at runtime. thus a non existent format # could be replaced with a guess format? rpath = ctx.file_access.get_random_remote_path() # For remote values, say https://raw.github.com/demo_data.csv, we will not upload to Flyte's store (S3/GCS) # and just return a literal with a uri equal to the path given if ctx.file_access.is_remote(python_val): return Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(expected.blob), uri=python_val))) # For local files, we'll upload for the user. ctx.file_access.put_data(python_val, rpath, is_multipart=False) return Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata(expected.blob), uri=rpath)))
def to_literal( self, ctx: FlyteContext, python_val: TensorFlow2ONNX, python_type: Type[TensorFlow2ONNX], expected: LiteralType, ) -> Literal: python_type, config = extract_config(python_type) if config: remote_path = ctx.file_access.get_random_remote_path() local_path = to_onnx(ctx, python_val.model, config.__dict__.copy()) ctx.file_access.put_data(local_path, remote_path, is_multipart=False) else: raise TypeTransformerFailedError(f"{python_type}'s config is None") return Literal( scalar=Scalar( blob=Blob( uri=remote_path, metadata=BlobMetadata( type=BlobType(format=self.ONNX_FORMAT, dimensionality=BlobType.BlobDimensionality.SINGLE) ), ) ) )
def convert_to_blob( self, ctx: typing.Optional[click.Context], param: typing.Optional[click.Parameter], value: typing.Union[Directory, FileParam], ) -> Literal: if isinstance(value, Directory): uri = self.get_uri_for_dir(value) else: uri = value.filepath if self._remote and value.local: fp = pathlib.Path(value.filepath) md5, _ = script_mode.hash_file(value.filepath) df_remote_location = self._create_upload_fn(filename=fp.name, content_md5=md5) self._flyte_ctx.file_access.put_data( fp, df_remote_location.signed_url) uri = df_remote_location.native_url lit = Literal(scalar=Scalar(blob=Blob( metadata=BlobMetadata(type=self._literal_type.blob), uri=uri, ), ), ) return lit
def to_literal( self, ctx: FlyteContext, python_val: DatasetProfileView, python_type: Type[DatasetProfileView], expected: LiteralType, ) -> Literal: remote_path = ctx.file_access.get_random_remote_directory() local_dir = ctx.file_access.get_random_local_path() python_val.write(local_dir) ctx.file_access.upload(local_dir, remote_path) return Literal(scalar=Scalar(blob=Blob(uri=remote_path, metadata=BlobMetadata(type=self._TYPE_INFO))))
def test_file_format_getting_python_value(): transformer = TypeEngine.get_transformer(FlyteFile) ctx = FlyteContext.current_context() # This file probably won't exist, but it's okay. It won't be downloaded unless we try to read the thing returned lv = Literal(scalar=Scalar(blob=Blob(metadata=BlobMetadata( type=BlobType(format="txt", dimensionality=0)), uri="file:///tmp/test"))) pv = transformer.to_python_value(ctx, lv, expected_python_type=FlyteFile["txt"]) assert isinstance(pv, FlyteFile) assert pv.extension() == "txt"
def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: meta = BlobMetadata( type=_core_types.BlobType( format=self.PYTHON_PICKLE_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE ) ) # Dump the task output into pickle local_dir = ctx.file_access.get_random_local_directory() os.makedirs(local_dir, exist_ok=True) local_path = ctx.file_access.get_random_local_path() uri = os.path.join(local_dir, local_path) with open(uri, "w+b") as outfile: cloudpickle.dump(python_val, outfile) remote_path = ctx.file_access.get_random_remote_path(uri) ctx.file_access.put_data(uri, remote_path, is_multipart=False) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
def to_literal(self, ctx: FlyteContext, python_val: np.ndarray, python_type: Type[np.ndarray], expected: LiteralType) -> Literal: meta = BlobMetadata(type=_core_types.BlobType( format=self.NUMPY_ARRAY_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE)) local_path = ctx.file_access.get_random_local_path() + ".npy" pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True) # save numpy array to a file # allow_pickle=False prevents numpy from trying to save object arrays (dtype=object) using pickle np.save(file=local_path, arr=python_val, allow_pickle=False) remote_path = ctx.file_access.get_random_remote_path(local_path) ctx.file_access.put_data(local_path, remote_path, is_multipart=False) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=remote_path)))
def to_literal( self, ctx: FlyteContext, python_val: MyDataset, python_type: Type[MyDataset], expected: LiteralType, ) -> Literal: """ This method is used to convert from given python type object ``MyDataset`` to the Literal representation """ # Step 1: lets upload all the data into a remote place recommended by Flyte remote_dir = ctx.file_access.get_random_remote_directory() ctx.file_access.upload_directory(python_val.base_dir, remote_dir) # Step 2: lets return a pointer to this remote_dir in the form of a literal return Literal( scalar=Scalar( blob=Blob(uri=remote_dir, metadata=BlobMetadata(type=self._TYPE_INFO)) ) )
def to_literal( self, ctx: FlyteContext, python_val: PyTorchCheckpoint, python_type: Type[PyTorchCheckpoint], expected: LiteralType, ) -> Literal: meta = BlobMetadata( type=_core_types.BlobType( format=self.PYTORCH_CHECKPOINT_FORMAT, dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE ) ) local_path = ctx.file_access.get_random_local_path() + ".pt" pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True) to_save = {} for field in fields(python_val): value = getattr(python_val, field.name) if value and field.name in ["module", "optimizer"]: to_save[field.name + "_state_dict"] = getattr(value, "state_dict")() elif value and field.name == "hyperparameters": if isinstance(value, dict): to_save.update(value) elif isinstance(value, tuple): to_save.update(value._asdict()) elif is_dataclass(value): to_save.update(asdict(value)) if not to_save: raise TypeTransformerFailedError(f"Cannot save empty {python_val}") # save checkpoint to a file torch.save(to_save, local_path) remote_path = ctx.file_access.get_random_remote_path(local_path) ctx.file_access.put_data(local_path, remote_path, is_multipart=False) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path)))
def to_literal( self, ctx: FlyteContext, python_val: FlyteDirectory, python_type: typing.Type[FlyteDirectory], expected: LiteralType, ) -> Literal: remote_directory = None should_upload = True meta = BlobMetadata(type=self._blob_type( format=self.get_format(python_type))) # There are two kinds of literals we handle, either an actual FlyteDirectory, or a string path to a directory. # Handle the FlyteDirectory case if isinstance(python_val, FlyteDirectory): # If the object has a remote source, then we just convert it back. if python_val._remote_source is not None: return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=python_val._remote_source))) source_path = python_val.path # If the user specified the remote_directory to be False, that means no matter what, do not upload. Also if the # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte # blob store doesn't make sense. if python_val.remote_directory is False or ctx.file_access.is_remote( source_path): should_upload = False # Set the remote destination if one was given instead of triggering a random one below remote_directory = python_val.remote_directory or None # Handle the string case elif isinstance(python_val, pathlib.Path) or isinstance( python_val, str): source_path = str(python_val) if ctx.file_access.is_remote(source_path): should_upload = False else: p = Path(source_path) if not p.is_dir(): raise ValueError( f"Expected a directory. {source_path} is not a directory" ) else: raise AssertionError( f"Expected FlyteDirectory or os.PathLike object, received {type(python_val)}" ) # If we're uploading something, that means that the uri should always point to the upload destination. if should_upload: if remote_directory is None: remote_directory = ctx.file_access.get_random_remote_directory( ) ctx.file_access.put_data(source_path, remote_directory, is_multipart=True) return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=remote_directory))) # If not uploading, then we can only take the original source path as the uri. else: return Literal(scalar=Scalar( blob=Blob(metadata=meta, uri=source_path)))
def to_literal( self, ctx: FlyteContext, python_val: typing.Union[FlyteFile, os.PathLike, str], python_type: typing.Type[FlyteFile], expected: LiteralType, ) -> Literal: remote_path = None should_upload = True if python_val is None: raise TypeTransformerFailedError("None value cannot be converted to a file.") if not (python_type is os.PathLike or issubclass(python_type, FlyteFile)): raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike") # information used by all cases meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type))) if isinstance(python_val, FlyteFile): source_path = python_val.path # If the object has a remote source, then we just convert it back. This means that if someone is just # going back and forth between a FlyteFile Python value and a Blob Flyte IDL value, we don't do anything. if python_val._remote_source is not None: return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=python_val._remote_source))) # If the user specified the remote_path to be False, that means no matter what, do not upload. Also if the # path given is already a remote path, say https://www.google.com, the concept of uploading to the Flyte # blob store doesn't make sense. if python_val.remote_path is False or ctx.file_access.is_remote(source_path): should_upload = False # If the type that's given is a simpler type, we also don't upload, and print a warning too. if python_type is os.PathLike: logger.warning( f"Converting from a FlyteFile Python instance to a Blob Flyte object, but only a {python_type} was" f" specified. Since a simpler type was specified, we'll skip uploading!" ) should_upload = False # Set the remote destination if one was given instead of triggering a random one below remote_path = python_val.remote_path or None elif isinstance(python_val, pathlib.Path) or isinstance(python_val, str): source_path = str(python_val) if issubclass(python_type, FlyteFile): if ctx.file_access.is_remote(source_path): should_upload = False else: if isinstance(python_val, pathlib.Path) and not python_val.is_file(): raise ValueError(f"Error converting pathlib.Path {python_val} because it's not a file.") # If it's a string pointing to a local destination, then make sure it's a file. if isinstance(python_val, str): p = pathlib.Path(python_val) if not p.is_file(): raise TypeTransformerFailedError(f"Error converting {python_val} because it's not a file.") # python_type must be os.PathLike - see check at beginning of function else: should_upload = False else: raise TypeTransformerFailedError(f"Expected FlyteFile or os.PathLike object, received {type(python_val)}") # If we're uploading something, that means that the uri should always point to the upload destination. if should_upload: if remote_path is None: remote_path = ctx.file_access.get_random_remote_path(source_path) ctx.file_access.put_data(source_path, remote_path, is_multipart=False) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_path))) # If not uploading, then we can only take the original source path as the uri. else: return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=source_path)))