Exemple #1
0
    def open_input_file(self, path):
        from pyarrow import PythonFile

        if not self.fs.isfile(path):
            raise FileNotFoundError(path)

        return PythonFile(self.fs.open(path, mode="rb"), mode="r")
Exemple #2
0
    def write_parquet(self, table: Table, object_uri: str,
                      metadata: dict[str, str]):
        logger.info(
            f"Attempting to upload: {object_uri}",
            extra={
                "event": "ATTEMPTING_UPLOAD_PARQUET_TO_S3",
                "object_uri": object_uri
            },
        )

        s3_object = self._object_from_uri(object_uri)
        buffer = BytesIO()
        buffer_file = PythonFile(buffer)
        parquet.write_table(table, buffer_file)
        buffer.seek(0)

        s3_object.put(Body=buffer, Metadata=metadata)

        logger.info(
            f"Successfully uploaded to: {object_uri}",
            extra={
                "event": "SUCCESSFULLY_UPLOADED_PARQUET_TO_S3",
                "object_uri": object_uri
            },
        )
Exemple #3
0
    def open_input_file(self, path):
        from pyarrow import PythonFile

        def real_open():
            return self.fs.open_input_file(path)
        full_path = f'{self.scheme}://{path}'
        # TODO: we may wait to cache the mmapped file
        if full_path not in self._file_cache:
            f = CachedFile(real_open, full_path, read_as_buffer=not self.for_arrow)
            self._file_cache[full_path] = f
        else:
            previous = self._file_cache[full_path]
            f = CachedFile(real_open, full_path, data_file=previous.data_file, mask_file=previous.mask_file, read_as_buffer=not self.for_arrow)
        if not self.for_arrow:
            return f
        f = vaex.file.FileProxy(f, full_path, None)
        return PythonFile(f, mode="r")
Exemple #4
0
    def open_append_stream(self, path):
        from pyarrow import PythonFile

        return PythonFile(self.fs.open(path, mode="ab"), mode="w")
Exemple #5
0
    def open_output_stream(self, path, metadata):
        from pyarrow import PythonFile

        return PythonFile(self.fs.open(path, mode="wb"), mode="w")