def _get_data_from_filepath(self, filepath_or_buffer): """ The function read_json accepts three input types: 1. filepath (string-like) 2. file-like object (e.g. open file object, StringIO) 3. JSON string This method turns (1) into (2) to simplify the rest of the processing. It returns input types (2) and (3) unchanged. """ # if it is a string but the file does not exist, it might be a JSON string filepath_or_buffer = stringify_path(filepath_or_buffer) if (not isinstance(filepath_or_buffer, str) or is_url(filepath_or_buffer) or is_fsspec_url(filepath_or_buffer) or file_exists(filepath_or_buffer)): self.handles = get_handle( filepath_or_buffer, "r", encoding=self.encoding, compression=self.compression, storage_options=self.storage_options, errors=self.encoding_errors, ) filepath_or_buffer = self.handles.handle return filepath_or_buffer
def _read( obj: bytes | FilePath | ReadBuffer[str] | ReadBuffer[bytes], encoding: str | None ) -> str | bytes: """ Try to read from a url, file or string. Parameters ---------- obj : str, unicode, path object, or file-like object Returns ------- raw_text : str """ text: str | bytes if ( is_url(obj) or hasattr(obj, "read") or (isinstance(obj, str) and file_exists(obj)) ): # error: Argument 1 to "get_handle" has incompatible type "Union[str, bytes, # Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]"; # expected "Union[PathLike[str], Union[str, Union[IO[Any], RawIOBase, # BufferedIOBase, TextIOBase, TextIOWrapper, mmap]]]" with get_handle( obj, "r", encoding=encoding # type: ignore[arg-type] ) as handles: text = handles.handle.read() elif isinstance(obj, (str, bytes)): text = obj else: raise TypeError(f"Cannot read object of type '{type(obj).__name__}'") return text
def _get_data_from_filepath(self, filepath_or_buffer): """ The function read_json accepts three input types: 1. filepath (string-like) 2. file-like object (e.g. open file object, StringIO) 3. JSON string This method turns (1) into (2) to simplify the rest of the processing. It returns input types (2) and (3) unchanged. It raises FileNotFoundError if the input is a string ending in one of .json, .json.gz, .json.bz2, etc. but no such file exists. """ # if it is a string but the file does not exist, it might be a JSON string filepath_or_buffer = stringify_path(filepath_or_buffer) if ( not isinstance(filepath_or_buffer, str) or is_url(filepath_or_buffer) or is_fsspec_url(filepath_or_buffer) or file_exists(filepath_or_buffer) ): self.handles = get_handle( filepath_or_buffer, "r", encoding=self.encoding, compression=self.compression, storage_options=self.storage_options, errors=self.encoding_errors, ) filepath_or_buffer = self.handles.handle elif ( isinstance(filepath_or_buffer, str) and filepath_or_buffer.lower().endswith( (".json",) + tuple(f".json{c}" for c in _extension_to_compression) ) and not file_exists(filepath_or_buffer) ): raise FileNotFoundError(f"File {filepath_or_buffer} does not exist") return filepath_or_buffer
def get_data_from_filepath( filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str], encoding, compression: CompressionOptions, storage_options: StorageOptions, ) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]: """ Extract raw XML data. The method accepts three input types: 1. filepath (string-like) 2. file-like object (e.g. open file object, StringIO) 3. XML string or bytes This method turns (1) into (2) to simplify the rest of the processing. It returns input types (2) and (3) unchanged. """ if not isinstance(filepath_or_buffer, bytes): filepath_or_buffer = stringify_path(filepath_or_buffer) if ( isinstance(filepath_or_buffer, str) and not filepath_or_buffer.startswith(("<?xml", "<")) ) and ( not isinstance(filepath_or_buffer, str) or is_url(filepath_or_buffer) or is_fsspec_url(filepath_or_buffer) or file_exists(filepath_or_buffer) ): with get_handle( filepath_or_buffer, "r", encoding=encoding, compression=compression, storage_options=storage_options, ) as handle_obj: filepath_or_buffer = ( # error: Incompatible types in assignment (expression has type # "Union[str, IO[str]]", variable has type "Union[Union[str, # PathLike[str]], bytes, ReadBuffer[bytes], ReadBuffer[str]]") handle_obj.handle.read() # type: ignore[assignment] if hasattr(handle_obj.handle, "read") else handle_obj.handle ) return filepath_or_buffer
def get_data_from_filepath( filepath_or_buffer, encoding, compression, storage_options, ) -> Union[str, bytes, Buffer]: """ Extract raw XML data. The method accepts three input types: 1. filepath (string-like) 2. file-like object (e.g. open file object, StringIO) 3. XML string or bytes This method turns (1) into (2) to simplify the rest of the processing. It returns input types (2) and (3) unchanged. """ filepath_or_buffer = stringify_path(filepath_or_buffer) if (isinstance(filepath_or_buffer, str) and not filepath_or_buffer.startswith( ("<?xml", "<"))) and (not isinstance(filepath_or_buffer, str) or is_url(filepath_or_buffer) or is_fsspec_url(filepath_or_buffer) or file_exists(filepath_or_buffer)): with get_handle( filepath_or_buffer, "r", encoding=encoding, compression=compression, storage_options=storage_options, ) as handle_obj: filepath_or_buffer = (handle_obj.handle.read() if hasattr( handle_obj.handle, "read") else handle_obj.handle) return filepath_or_buffer