def _serialize_data(data: Any) -> np.ndarray:
     # h5py v3 will support bytes,
     # but for now the workaround is variable-length np unit8
     return np.fromstring(serialize(data, 'msgpack-ext'), dtype='uint8')
Beispiel #2
0
    def handle_request(self, collection_id: int, request: str, model: Dict[str, Any]) -> Dict[str, Any]:
        """
        Handles REST requests related to views. This function implements the GET endpoint
        /collections/[collection_id]/view/[request]

        Parameters
        ----------
        collection_id: int
            Collection id corresponding to a view.
        request: str
            Requested data. Allowed options and corresponding DatasetView methods:
            - list: list_values
            - value: get_values
            - molecule: get_molecules
            - entry: get_entries
        model:
            REST model containing input options.

        Returns
        -------
        Dict[str, Any]:
            Dictionary corresponding to requested REST model
        """
        meta = {"errors": [], "success": False, "error_description": False, "msgpacked_cols": []}

        try:
            view = self._get_view(collection_id)
        except IOError:
            meta["success"] = False
            meta["error_description"] = f"View not available for collection #{collection_id}"
            return {"meta": meta, "data": None}

        if request == "entry":
            try:
                df = view.get_entries(subset=model["subset"])
            except KeyError:
                meta["success"] = False
                meta["error_description"] = "Unable to find requested entry."
                return {"meta": meta, "data": None}
        elif request == "molecule":
            series = view.get_molecules(model["indexes"], keep_serialized=True)
            df = pd.DataFrame({"molecule": series})
            df.reset_index(inplace=True)
            meta["msgpacked_cols"].append("molecule")
        elif request == "value":
            df, units = view.get_values(model["queries"], subset=model["subset"])
            df.reset_index(inplace=True)
        elif request == "list":
            df = view.list_values()
        else:
            meta["success"] = False
            meta["error_description"] = f"Unknown view request: {request}."
            return {"meta": meta, "data": None}

        # msgpack columns not supported by pyarrow
        pack_columns = []
        for col in df.columns:
            if len(df) > 0:
                sample = df[col].iloc[0]
                if isinstance(sample, np.ndarray):
                    pack_columns.append(col)
                elif isinstance(sample, list):
                    pack_columns.append(col)
                # Add any other datatypes that need to be handled specially go here

        for col in pack_columns:
            df[col] = df[col].apply(lambda x: serialize(x, "msgpack-ext"))
        meta["msgpacked_cols"] += pack_columns

        # serialize
        f = io.BytesIO()
        df.to_feather(f)
        df_feather = f.getvalue()

        if request == "value":
            data = {"values": df_feather, "units": units}
        else:
            data = df_feather

        meta["success"] = True

        return {"meta": meta, "data": data}
 def _serialize_field(field: Any) -> str:
     return serialize(field, 'json')
Beispiel #4
0
 def _serialize_data(data: Any) -> np.ndarray:
     # h5py v3 will support bytes,
     # but for now the workaround is variable-length np unit8
     return np.frombuffer(serialize(data, "msgpack-ext"), dtype="uint8")