def annotation_to_fbs_matrix(self, axis, fields=None, labels=None): with ServerTiming.time(f"annotations.{axis}.query"): A = self.open_array(str(axis)) # may raise if fields contains unknown key cxg_fields, anno_fields, return_fields, index_field = self._annotations_field_split( axis, fields, A, labels) if cxg_fields is None: data = A[:] elif cxg_fields: data = A.query(attrs=cxg_fields)[:] else: data = {} df = pd.DataFrame.from_dict(data) if axis == Axis.OBS and labels is not None and not labels.empty: if anno_fields is None: assert index_field df = df.join(labels, index_field) elif anno_fields: assert index_field df = df.join(labels[anno_fields], index_field) if return_fields: df = df[return_fields] with ServerTiming.time(f"annotations.{axis}.encode"): fbs = encode_matrix_fbs(df, col_idx=df.columns) return fbs
def layout_to_fbs_matrix(self, fields): """ return specified embeddings as a flatbuffer, using the cellxgene matrix fbs encoding. * returns only first two dimensions, with name {ename}_0 and {ename}_1, where {ename} is the embedding name. * client assumes each will be individually centered & scaled (isotropically) to a [0, 1] range. * does not support filtering """ embeddings = self.get_embedding_names() if fields is None or len(fields) == 0 else fields layout_data = [] with ServerTiming.time("layout.query"): for ename in embeddings: embedding = self.get_embedding_array(ename, 2) normalized_layout = DataAdaptor.normalize_embedding(embedding) layout_data.append(pd.DataFrame(normalized_layout, columns=[f"{ename}_0", f"{ename}_1"])) with ServerTiming.time("layout.encode"): if layout_data: df = pd.concat(layout_data, axis=1, copy=False) else: df = pd.DataFrame() fbs = encode_matrix_fbs(df, col_idx=df.columns, row_idx=None) return fbs
def get_embedding_names(self): with ServerTiming.time("layout.lsuri"): pemb = self.get_path("emb") embeddings = [os.path.basename(p) for (p, t) in self.lsuri(pemb) if t == "array"] if len(embeddings) == 0: raise DatasetAccessError("cxg matrix missing embeddings") return embeddings
def compute_embedding(self, method, obsFilter): if Axis.VAR in obsFilter: raise FilterError( "Observation filters may not contain variable conditions") if method != "umap": raise NotImplementedError( f"re-embedding method {method} is not available.") try: shape = self.get_shape() obs_mask = self._axis_filter_to_mask(Axis.OBS, obsFilter["obs"], shape[0]) except (KeyError, IndexError): raise FilterError("Error parsing filter") with ServerTiming.time("layout.compute"): X_umap = scanpy_umap(self.data, obs_mask) normalized_layout = DataAdaptor.normalize_embedding(X_umap) # Server picks reemedding name, which must not collide with any other # embedding name generated by this backed. name = f"reembed:{method}_{datetime.now().isoformat(timespec='milliseconds')}" dims = [f"{name}_0", f"{name}_1"] df = pd.DataFrame(normalized_layout, columns=dims) fbs = encode_matrix_fbs(df, col_idx=df.columns, row_idx=None) schema = {"name": name, "type": "float32", "dims": dims} return (schema, fbs)
def compute_embedding(self, method, obsFilter): if Axis.VAR in obsFilter: raise FilterError("Observation filters may not contain variable conditions") if method != "umap": raise NotImplementedError(f"re-embedding method {method} is not available.") try: shape = self.get_shape() obs_mask = self._axis_filter_to_mask(Axis.OBS, obsFilter["obs"], shape[0]) except (KeyError, IndexError): raise FilterError("Error parsing filter") with ServerTiming.time("layout.compute"): X_umap = scanpy_umap(self.data, obs_mask) # Server picks reemedding name, which must not collide with any other # embedding name generated by this backend. name = f"reembed:{method}_{datetime.now().isoformat(timespec='milliseconds')}" dims = [f"{name}_0", f"{name}_1"] layout_schema = {"name": name, "type": "float32", "dims": dims} self.schema["layout"]["obs"].append(layout_schema) self.data.obsm[f"X_{name}"] = X_umap return layout_schema