Beispiel #1
0
    def _load_data(self, data_locator):
        # as of AnnData 0.6.19, backed mode performs initial load fast, but at the
        # cost of significantly slower access to X data.
        try:
            # there is no guarantee data_locator indicates a local file.  The AnnData
            # API will only consume local file objects.  If we get a non-local object,
            # make a copy in tmp, and delete it after we load into memory.
            with data_locator.local_handle() as lh:
                # as of AnnData 0.6.19, backed mode performs initial load fast, but at the
                # cost of significantly slower access to X data.
                backed = 'r' if self.config['backed'] else None
                self.data = anndata.read_h5ad(lh, backed=backed)

        except ValueError:
            raise ScanpyFileError(
                "File must be in the .h5ad format. Please read "
                "https://github.com/theislab/scanpy_usage/blob/master/170505_seurat/info_h5ad.md to "
                "learn more about this format. You may be able to convert your file into this format "
                "using `cellxgene prepare`, please run `cellxgene prepare --help` for more "
                "information."
            )
        except MemoryError:
            raise ScanpyFileError("Out of memory - file is too large for available memory.")
        except Exception as e:
            raise ScanpyFileError(
                f"{e} - file not found or is inaccessible.  File must be an .h5ad object.  "
                f"Please check your input and try again."
            )
Beispiel #2
0
    def annotation_put_fbs(self, axis, fbs, uid=None, collection=None):
        if not self.config["annotations"]:
            raise DisabledFeatureError("Writable annotations are not enabled")

        fname = self.get_anno_fname(uid, collection)
        if not fname:
            raise ScanpyFileError("Writable annotations - unable to determine file name for annotations")

        if axis != Axis.OBS:
            raise ValueError("Only OBS dimension access is supported")

        new_label_df = decode_matrix_fbs(fbs)
        if not new_label_df.empty:
            new_label_df.index = self.original_obs_index
        self._validate_label_data(new_label_df)  # paranoia

        # if any of the new column labels overlap with our existing labels, raise error
        duplicate_columns = list(set(new_label_df.columns) & set(self.data.obs.columns))
        if not new_label_df.columns.is_unique or len(duplicate_columns) > 0:
            raise KeyError(f"Labels file may not contain column names which overlap "
                           f"with h5ad obs columns {duplicate_columns}")

        # update our internal state and save it.  Multi-threading often enabled,
        # so treat this as a critical section.
        with self.label_lock:
            lastmod = self.data_locator.lastmodtime()
            lastmodstr = "'unknown'" if lastmod is None else lastmod.isoformat(timespec="seconds")
            header = f"# Annotations generated on {datetime.now().isoformat(timespec='seconds')} " \
                     f"using cellxgene version {cellxgene_version}\n" \
                     f"# Input data file was {self.data_locator.uri_or_path}, " \
                     f"which was last modified on {lastmodstr}\n"
            write_labels(fname, new_label_df, header, backup_dir=self.get_anno_backup_dir(uid, collection))

        return jsonify_scanpy({"status": "OK"})
Beispiel #3
0
 def _load_data(data):
     # Based on benchmarking, cache=True has no impact on perf.
     # Note: as of current scanpy/anndata release, setting backed='r' will
     # result in an error.  https://github.com/theislab/anndata/issues/79
     try:
         result = sc.read(data, cache=True)
     except ValueError:
         raise ScanpyFileError(
             "File must be in the .h5ad format. Please read "
             "https://github.com/theislab/scanpy_usage/blob/master/170505_seurat/info_h5ad.md to "
             "learn more about this format. You may be able to convert your file into this format "
             "using `cellxgene prepare`, please run `cellxgene prepare --help` for more "
             "information.")
     except Exception as e:
         raise ScanpyFileError(
             f"Error while loading file: {e}, File must be in the .h5ad format, please check "
             f"that your input and try again.")
     return result
Beispiel #4
0
 def _load_data(self, data):
     # as of AnnData 0.6.19, backed mode performs initial load fast, but at the
     # cost of significantly slower access to X data.
     try:
         self.data = anndata.read_h5ad(data)
     except ValueError:
         raise ScanpyFileError(
             "File must be in the .h5ad format. Please read "
             "https://github.com/theislab/scanpy_usage/blob/master/170505_seurat/info_h5ad.md to "
             "learn more about this format. You may be able to convert your file into this format "
             "using `cellxgene prepare`, please run `cellxgene prepare --help` for more "
             "information.")
     except MemoryError:
         raise ScanpyFileError(
             "Error while loading file: out of memory, file is too large"
             " for memory available")
     except Exception as e:
         raise ScanpyFileError(
             f"Error while loading file: {e}, File must be in the .h5ad format, please check "
             f"that your input and try again.")
Beispiel #5
0
    def annotation_to_fbs_matrix(self, axis, fields=None, uid=None, collection=None):
        if axis == Axis.OBS:
            if self.config["annotations"]:
                try:
                    labels = read_labels(self.get_anno_fname(uid, collection))
                except Exception as e:
                    raise ScanpyFileError(
                        f"Error while loading label file: {e}, File must be in the .csv format, please check "
                        f"your input and try again."
                    )
            else:
                labels = None

            if labels is not None and not labels.empty:
                df = self.data.obs.join(labels, self.config['obs_names'])
            else:
                df = self.data.obs
        else:
            df = self.data.var
        if fields is not None and len(fields) > 0:
            df = df[fields]
        return encode_matrix_fbs(df, col_idx=df.columns)