Example #1
0
    def as_bytes(self, data, extension=None):
        if extension is None:
            extension = self.default_extension()

        if extension == "json":
            return json.dumps(data).encode("utf-8"), self.default_mimetype()
        elif extension in ["html", "htm"]:
            if isinstance(data, str):
                return data.encode("utf-8"), mimetype_from_extension("html")
            else:
                return (
                    f"<pre>{json.dumps(data)}</pre>".encode("utf-8"),
                    mimetype_from_extension("html"),
                )
        raise Exception(f"Unsupported file extension: {extension}")
Example #2
0
 def with_filename(self, filename):
     """set filename"""
     self.metadata["filename"] = filename
     if "." in filename:
         self.extension = filename.split(".")[-1].lower()
         self.metadata["mimetype"] =  mimetype_from_extension(self.extension)
     return self
Example #3
0
 def as_bytes(self, data, extension=None):
     if extension is None:
         extension = self.default_extension()
     assert self.is_type_of(data)
     mimetype = mimetype_from_extension(extension)
     if extension == "json":
         output = StringIO()
         output.write(data.to_json())
         return output.getvalue().encode("utf-8"), mimetype
     elif extension == "yaml":
         output = StringIO()
         output.write(data.to_yaml())
         return output.getvalue().encode("utf-8"), mimetype
     elif extension in ("h5", "hdf5"):
         handle, name = mkstemp(
             prefix="keras_model_", suffix="." + extension
         )  # HACK - we need a file name, NamedTemporaryFile implementation does not work in windows
         os.close(handle)
         data.save(name)
         b = open(name, "rb").read()
         os.remove(name)
         return b, mimetype
     else:
         raise Exception(
             f"Serialization: file extension {extension} is not supported by kerasmodel type."
         )
Example #4
0
 def as_bytes(self, data, extension=None):
     if extension is None:
         extension = self.default_extension()
         mime = self.default_mimetype()
     else:
         mime = mimetype_from_extension(extension, "text/plain")
     return data.encode("utf-8"), mime
Example #5
0
    def as_bytes(self, data, extension=None):
        if extension is None:
            extension = self.default_extension()
        assert self.is_type_of(data)
        mimetype = mimetype_from_extension(extension)

        if extension == "parquet":
            with TemporaryDirectory() as tmpdir:
                path = Path(tmpdir) / f"data.{extension}"
                table = pyarrow.Table.from_batches(data.collect())
                pyarrow.parquet.write_table(table, str(path))
                b = path.read_bytes()
                return b, mimetype
        elif extension == "csv":
            with TemporaryDirectory() as tmpdir:
                path = Path(tmpdir) / f"data.{extension}"
                table = pyarrow.Table.from_batches(data.collect())
                pyarrow.csv.write_csv(table, str(path))
                b = path.read_bytes()
                return b, mimetype
        elif extension == "feather":
            with TemporaryDirectory() as tmpdir:
                path = Path(tmpdir) / f"data.{extension}"
                table = pyarrow.Table.from_batches(data.collect())
                pyarrow.feather.write_feather(table, str(path))
                b = path.read_bytes()
                return b, mimetype
        else:
            raise Exception(
                f"Serialization: file extension {extension} is not supported by DataFusion data-frame type."
            )
Example #6
0
    def as_bytes(self, data, extension=None):
        if extension is None:
            extension = self.default_extension()

        if extension == "djson":
            d = "{\n"
            sep = ""
            for key, value in data.items():
                assert isinstance(key, str)
                d += sep
                d += "%-20s%s" % (f'"{key}":', self.encode_element(value))
                sep = ",\n"
            d += "\n}"
            return d.encode("utf-8"), mimetype_from_extension("djson")
        elif extension == "json":
            return json.dumps(data).encode("utf-8"), mimetype_from_extension("json")

        raise Exception(f"Unsupported file extension: {extension}")
Example #7
0
    def metadata(self):
        metadata = self._metadata.as_dict()
        title = self.title
        description = self.description
        if title is None:
            if self.raw_query is None:
                title = ""
            else:
                p = parse(self.raw_query)
                if title in ("", None):
                    title = p.filename() or ""

        mimetype = self.mimetype
        if mimetype is None:
            if self.query is not None:
                if self.query.extension() is None:
                    mimetype = "application/octet-stream"
                else:
                    mimetype = mimetype_from_extension(self.query.extension())

        message = self._metadata.message
        if message in (None, ""):
            log = self._metadata.get("log", [])
            if len(log):
                message = log[-1]["message"]
        if message in (None, ""):
            log = self._metadata.get("child_log", [])
            if len(log):
                message = log[-1]["message"]

        metadata.update(
            dict(
                status=self.status.value,
                title=title,
                description=description,
                mimetype=mimetype,
                query=self.raw_query,
                parent_query=self.parent_query,
                argument_queries=self.argument_queries,
                #            log=self.log[:],
                is_error=self.is_error,
                direct_subqueries=self.direct_subqueries[:],
                progress_indicators=self.progress_indicators[:],
                child_progress_indicators=self.child_progress_indicators[:],
                child_log=self.child_log,
                message=message,
                started=self.started,
                updated=self.now(),
                created=self.created,
                caching=self.caching,
                vars=dict(self.vars),
                html_preview=self.html_preview,
                side_effect=False,
            ))
        return metadata
Example #8
0
 def as_bytes(self, data, extension=None):
     if extension is None:
         extension = self.default_extension()
     assert self.is_type_of(data)
     mimetype = mimetype_from_extension(extension)
     if extension == "csv":
         output = StringIO()
         data.to_csv(output, index=False)
         return output.getvalue().encode("utf-8"), mimetype
     elif extension == "tsv":
         output = StringIO()
         data.to_csv(output, index=False, sep="\t")
         return output.getvalue().encode("utf-8"), mimetype
     elif extension == "json":
         output = StringIO()
         data.to_json(output, index=False, orient="table")
         return output.getvalue().encode("utf-8"), mimetype
     elif extension in ("html", "htm"):
         output = StringIO()
         data.to_html(output, index=False)
         return output.getvalue().encode("utf-8"), mimetype
     elif extension in ("pkl", "pickle"):
         output = ResilientBytesIO()
         data.to_pickle(output, compression=None)
         b = output.getvalue()
         output.really_close()
         return b, mimetype
     elif extension == "parquet":
         output = ResilientBytesIO()
         data.to_parquet(output, engine="pyarrow")
         b = output.getvalue()
         output.really_close()
         return b, mimetype
     elif extension == "feather":
         output = ResilientBytesIO()
         data.to_feather(output)
         b = output.getvalue()
         output.really_close()
         return b, mimetype
     elif extension == "xlsx":
         output = BytesIO()
         writer = pd.ExcelWriter(output, engine="xlsxwriter")
         data.to_excel(writer)
         writer.close()
         return output.getvalue(), mimetype
     elif extension == "msgpack":
         output = BytesIO()
         data.to_msgpack(output)
         return output.getvalue(), mimetype
     else:
         raise Exception(
             f"Serialization: file extension {extension} is not supported by dataframe type."
         )
Example #9
0
 def as_bytes(self, data, extension=None):
     if extension is None:
         extension = self.default_extension()
     assert self.is_type_of(data)
     if extension in ["idf", "json"]:
         mimetype = mimetype_from_extension("json")
         d = data.to_dict()
         return json.dumps(d).encode("utf-8"), mimetype
     else:
         raise Exception(
             f"Serialization: file extension {extension} is not supported by stored dataframe iterator type."
         )
Example #10
0
    def as_bytes(self, data, extension=None):
        if extension is None:
            extension = self.default_extension()
        assert self.is_type_of(data)
        mimetype = mimetype_from_extension(extension)

        if extension in ("xlsx", "xltx"):
            with TemporaryDirectory() as tmpdir:
                path = Path(tmpdir) / f"data.{extension}"
                data.save(str(path))
                b = path.read_bytes()
                return b, mimetype
        else:
            raise Exception(
                f"Serialization: file extension {extension} is not supported by openpyxl_workbook type."
            )
Example #11
0
 def as_bytes(self, data, extension=None):
     if extension is None:
         extension = self.default_extension()
     assert self.is_type_of(data)
     mimetype = mimetype_from_extension(extension)
     if extension in ("pkl", "pickle"):
         output = BytesIO()
         pickle.dump(data, output)
         return output.getvalue(), mimetype
     elif extension in ("png", "svg", "pdf", "ps", "eps", "svgz"):
         output = BytesIO()
         data.savefig(output, dpi=300, format=extension)
         return output.getvalue(), mimetype
     else:
         raise Exception(
             f"Serialization: file extension {extension} is not supported by Matplotlib Figure type."
         )
Example #12
0
 def as_bytes(self, data, extension=None):
     """Serialize data as bytes
     File extension may be provided and influence the serialization format.
     """
     if extension is None:
         extension = self.default_extension()
     assert self.is_type_of(data)
     mimetype = mimetype_from_extension(extension)
     if extension == "csv":
         output = "".join(data.gen_csv(show_headers=True, show_tags=True))
         return output.encode("utf-8"), mimetype
     elif extension == "json":
         output = "".join(data.gen_json(show_headers=True, show_tags=True))
         return output.encode("utf-8"), mimetype
     else:
         raise Exception(
             f"Serialization: file extension {extension} is not supported by HXL dataset type."
         )
Example #13
0
    def as_bytes(self, data, extension=None):
        if extension is None:
            extension = self.default_extension()
        assert self.is_type_of(data)
        mimetype = mimetype_from_extension(extension)

        if extension == "csv":
            output = BytesIO()
            data.to_csv(output)
            return output.getvalue(), mimetype
        elif extension == "parquet":
            output = BytesIO()
            data.to_parquet(output)
            return output.getvalue(), mimetype
        else:
            raise Exception(
                f"Serialization: file extension {extension} is not supported by polars data-frame type."
            )
Example #14
0
 def as_bytes(self, data, extension=None):
     if extension is None:
         extension = self.default_extension()
     assert self.is_type_of(data)
     mimetype = mimetype_from_extension(extension)
     format_name, can_read, can_write = self.format_from_extension(
         extension)
     if can_write:
         output = BytesIO()
         data.save(output, format=format_name)
         return output.getvalue(), mimetype
     else:
         if can_read:
             raise Exception(
                 f"Serialization: PIL Image only supports reading, but not writing for file extension {extension}."
             )
         else:
             raise Exception(
                 f"Serialization: file extension {extension} is not supported by PIL Image."
             )
Example #15
0
 def as_bytes(self, data, extension=None):
     if extension is None:
         extension = self.default_extension()
     assert self.is_type_of(data)
     mimetype = mimetype_from_extension(extension)
     if extension == "json":
         output = StringIO()
         output.write(data.to_json())
         return output.getvalue().encode("utf-8"), mimetype
     elif extension == "yaml":
         output = StringIO()
         output.write(data.to_yaml())
         return output.getvalue().encode("utf-8"), mimetype
     elif extension in ("h5", "hdf5"):
         with TemporaryDirectory() as tmpdir:
             path = Path(tmpdir) / f"data.{extension}"
             data.save(str(path))
             b=path.read_bytes()
             return b, mimetype
     else:
         raise Exception(
             f"Serialization: file extension {extension} is not supported by kerasmodel type."
         )
Example #16
0
 def as_bytes(self, data, extension=None):
     return data, mimetype_from_extension(extension)
Example #17
0
 def mimetype(self):
     """Return mime type of the data"""
     
     return self.metadata.get("mimetype", mimetype_from_extension(self.extension))