Esempio n. 1
0
    def _default_and_validate_layouts(self):
        """ function:
            a) generate list of default layouts, if not already user specified
            b) validate layouts are legal.  remove/warn on any that are not
            c) cap total list of layouts at global const MAX_LAYOUTS
        """
        layouts = self.config['layout']
        # handle default
        if layouts is None or len(layouts) == 0:
            # load default layouts from the data.
            layouts = [key[2:] for key in self.data.obsm_keys() if type(key) == str and key.startswith("X_")]
            if len(layouts) == 0:
                raise PrepareError(f"Unable to find any precomputed layouts within the dataset.")

        # remove invalid layouts
        valid_layouts = []
        obsm_keys = self.data.obsm_keys()
        for layout in layouts:
            layout_name = f"X_{layout}"
            if layout_name not in obsm_keys:
                warnings.warn(f"Ignoring unknown layout name: {layout}.")
            elif not self._is_valid_layout(self.data.obsm[layout_name]):
                warnings.warn(f"Ignoring layout due to malformed shape or data type: {layout}")
            else:
                valid_layouts.append(layout)

        if len(valid_layouts) == 0:
            raise PrepareError(f"No valid layout data.")

        # cap layouts to MAX_LAYOUTS
        self.config['layout'] = valid_layouts[0:MAX_LAYOUTS]
Esempio n. 2
0
 def layout(self, filter, interactive_limit=None):
     """
     Computes a n-d layout for cells through dimensionality reduction.
     :param filter: filter: dictionary with filter params
     :param interactive_limit: -- don't compute if total # genes in dataframes are larger than this
     :return:  [cellid, x, y, ...]
     """
     try:
         df = self.filter_dataframe(filter)
     except (KeyError, IndexError) as e:
         raise FilterError(f"Error parsing filter: {e}") from e
     if interactive_limit and len(df.obs.index) > interactive_limit:
         raise InteractiveError(
             "Size data is too large for interactive computation")
     # TODO Filtering cells is fine, but filtering genes does nothing because the neighbors are
     # calculated using the original vars (geneset) and this doesn’t get updated when you use less.
     # Need to recalculate neighbors (long) if user requests new layout filtered by var
     # TODO for MVP we are pushing computation of layout to preprocessing and not allowing re-layout
     # this will probably change after user feedback
     # getattr(sc.tl, self.layout_method)(df, random_state=123)
     try:
         df_layout = df.obsm[f"X_{self.layout_method}"]
     except ValueError as e:
         raise PrepareError(
             f"Layout has not been calculated using {self.layout_method}, "
             f"please prepare your datafile and relaunch cellxgene") from e
     normalized_layout = DataFrame((df_layout - df_layout.min()) /
                                   (df_layout.max() - df_layout.min()),
                                   index=df.obs.index)
     return {
         "ndims": normalized_layout.shape[1],
         "coordinates": normalized_layout.to_records(index=True).tolist()
     }
Esempio n. 3
0
 def _validate_data_calculations(self):
     layout_key = f"X_{self.layout_method}"
     try:
         assert layout_key in self.data.obsm_keys()
     except AssertionError:
         raise PrepareError(
             f"Cannot find a field with coordinates for the {self.layout_method} layout requested. A different"
             f" layout may have been computed. The requested layout must be pre-calculated and saved "
             f"back in the h5ad file. You can run "
             f"`cellxgene prepare --layout {self.layout_method} <datafile>` "
             f"to solve this problem. ")
Esempio n. 4
0
    def layout_to_fbs_matrix(self):
        """
        Return the default 2-D layout for cells as a FBS Matrix.

        Caveats:
        * does not support filtering
        * only returns Matrix in columnar layout
        """
        try:
            df_layout = self.data.obsm[f"X_{self.layout_method}"]
        except ValueError as e:
            raise PrepareError(
                f"Layout has not been calculated using {self.layout_method}, "
                f"please prepare your datafile and relaunch cellxgene") from e
        normalized_layout = (df_layout - df_layout.min()) / (df_layout.max() -
                                                             df_layout.min())
        return encode_matrix_fbs(normalized_layout.astype(dtype=np.float32),
                                 col_idx=None,
                                 row_idx=None)
Esempio n. 5
0
    def layout_to_fbs_matrix(self):
        """
        Return the default 2-D layout for cells as a FBS Matrix.

        Caveats:
        * does not support filtering
        * only returns Matrix in columnar layout

        All embeddings must be individually centered & scaled (isotropically)
        to a [0, 1] range.
        """
        try:
            layout_data = []
            for layout in self.config["layout"]:
                full_embedding = self.data.obsm[f"X_{layout}"]
                embedding = full_embedding[:, :2]

                # scale isotropically
                min = embedding.min(axis=0)
                max = embedding.max(axis=0)
                scale = np.amax(max - min)
                normalized_layout = (embedding - min) / scale

                # translate to center on both axis
                translate = 0.5 - ((max - min) / scale / 2)
                normalized_layout = normalized_layout + translate

                normalized_layout = normalized_layout.astype(dtype=np.float32)
                layout_data.append(
                    pandas.DataFrame(normalized_layout,
                                     columns=[f"{layout}_0", f"{layout}_1"]))

        except ValueError as e:
            raise PrepareError(
                f"Layout has not been calculated using {self.config['layout']}, "
                f"please prepare your datafile and relaunch cellxgene") from e

        df = pandas.concat(layout_data, axis=1, copy=False)
        return encode_matrix_fbs(df, col_idx=df.columns, row_idx=None)