예제 #1
0
	def __setattr__(self, name: str, val: Any) -> None:
		if name.startswith("!"):
			super(GlobalAttributeManager, self).__setattr__(name[1:], val)
		elif "/" in name:
			raise KeyError("Attribute name cannot contain slash (/)")
		else:
			if self.f is not None:
				if loompy.compare_loom_spec_version(self.f, "3.0.0") < 0 and "attrs" not in self.f["/"]:
					normalized = loompy.normalize_attr_values(val, False)
					self.f.attrs[name] = normalized
					self.f.flush()
					val = self.f.attrs[name]
					# Read it back in to ensure it's synced and normalized
					normalized = loompy.materialize_attr_values(val)
					self.__dict__["storage"][name] = normalized
				else:
					normalized = loompy.normalize_attr_values(val, True)
					if name in self.f["attrs"]:
						del self.f["attrs"][name]
					if not np.isscalar(normalized) and normalized.dtype == np.object_:
						self.ds._file.create_dataset("attrs/" + name, data=normalized, dtype=h5py.special_dtype(vlen=str))
					else:
						self.f["attrs"][name] = normalized
					self.f.flush()
					val = self.f["attrs"][name][()]
					# Read it back in to ensure it's synced and normalized
					normalized = loompy.materialize_attr_values(val)
					self.__dict__["storage"][name] = normalized
예제 #2
0
	def __setattr__(self, name: str, val: np.ndarray) -> None:
		"""
		Set the value of a named attribute

		Args:
			name (str) 			Name of the attribute
			val (np.ndarray)	Value of the attribute

		Remarks:
			Length must match the corresponding matrix dimension
			The values are automatically HMTL escaped and converted to ASCII for storage
		"""
		if name.startswith("!"):
			super(AttributeManager, self).__setattr__(name[1:], val)
		else:
			if self.ds is not None:
				values = loompy.normalize_attr_values(val)
				a = ["/row_attrs/", "/col_attrs/"][self.axis]
				if self.ds.shape[self.axis] != 0 and values.shape[0] != self.ds.shape[self.axis]:
					raise ValueError(f"Attribute must have exactly {self.ds.shape[self.axis]} values but {len(values)} were given")
				if self.ds._file[a].__contains__(name):
					del self.ds._file[a + name]
				self.ds._file[a + name] = values  # TODO: for 2D arrays, use block compression along columns/rows
				self.ds._file.flush()
				self.__dict__["storage"][name] = loompy.materialize_attr_values(self.ds._file[a][name][:])
			else:
				self.__dict__["storage"][name] = val
예제 #3
0
    def __setattr__(self, name: str, val: np.ndarray) -> None:
        """
		Set the value of a named attribute

		Args:
			name (str) 			Name of the attribute
			val (np.ndarray)	Value of the attribute

		Remarks:
			Length must match the corresponding matrix dimension
			The values are automatically HMTL escaped and converted to ASCII for storage
		"""
        if name.startswith("!"):
            super(AttributeManager, self).__setattr__(name[1:], val)
        elif "/" in name:
            raise KeyError("Attribute name cannot contain slash (/)")
        else:
            if self.ds is not None:
                values = loompy.normalize_attr_values(
                    val,
                    compare_loom_spec_version(self.ds._file, "3.0.0") >= 0)
                a = ["/row_attrs/", "/col_attrs/"][self.axis]
                if self.ds.shape[self.axis] != 0 and values.shape[
                        0] != self.ds.shape[self.axis]:
                    raise ValueError(
                        f"Attribute '{name}' must have exactly {self.ds.shape[self.axis]} values but {len(values)} were given"
                    )
                if self.ds._file[a].__contains__(name):
                    del self.ds._file[a + name]

                if isinstance(self.ds._file, h5py.File):
                    self.ds._file.create_dataset(
                        a + name,
                        data=values,
                        dtype=h5py.special_dtype(vlen=str)
                        if values.dtype == np.object_ else values.dtype,
                        maxshape=(values.shape[0], )
                        if len(values.shape) == 1 else (values.shape[0], None),
                        fletcher32=False,
                        compression="gzip",
                        shuffle=False,
                        compression_opts=2)
                else:
                    self.ds._file.create_dataset(
                        a + name,
                        data=values.astype(np.string_)
                        if values.dtype == np.object_ else values)

                self.ds._file[a + name].attrs["last_modified"] = timestamp()
                self.ds._file[a].attrs["last_modified"] = timestamp()
                self.ds._file.attrs["last_modified"] = timestamp()
                if isinstance(self.ds._file, h5py.File):
                    self.ds._file.flush()
                self.__dict__["storage"][
                    name] = loompy.materialize_attr_values(
                        self.ds._file[a][name][:])
            else:
                self.__dict__["storage"][name] = val
예제 #4
0
 def __setattr__(self, name: str, val: Any) -> None:
     if name.startswith("!"):
         super(FileAttributeManager, self).__setattr__(name[1:], val)
     else:
         if self.f is not None:
             normalized = loompy.normalize_attr_values(val)
             self.f.attrs[name] = normalized
             self.f.flush()
             val = self.f.attrs[name]
             # Read it back in to ensure it's synced and normalized
             normalized = loompy.materialize_attr_values(val)
             self.__dict__["storage"][name] = normalized
예제 #5
0
    def _to_loom(self):
        """Write a loom file from Redshift query manifests.

        Returns:
           output_path: Path to the new loom file.
        """

        # Put loom on the output filename if it's not already there.
        if not self.local_output_filename.endswith(".loom"):
            self.local_output_filename += ".loom"

        # Read the row (gene) attributes and then set some conventional names
        gene_df = self.query_results[QueryType.FEATURE].load_results()
        gene_df["featurekey"] = gene_df.index

        gene_count = gene_df.shape[0]
        cell_count = self.query_results[
            QueryType.CELL].manifest["record_count"]

        os.makedirs(self.working_dir, exist_ok=True)

        loom_path = os.path.join(self.working_dir, self.local_output_filename)
        loom_file = h5py.File(loom_path, mode="w")

        # Set some file attributes defined in the loom spec
        loom_file.attrs["CreationDate"] = self._loom_timestamp()
        loom_file.attrs["LOOM_SPEC_VERSION"] = "2.0.1"

        # Create the hdf5 dataset that will hold all the expression data
        matrix_dataset = loom_file.create_dataset("matrix",
                                                  shape=(gene_count,
                                                         cell_count),
                                                  dtype="float32",
                                                  compression="gzip",
                                                  compression_opts=2,
                                                  chunks=(gene_count, 1))

        cellkeys = []
        cell_counter = 0

        # Iterate through the cells. For each set of cells reshape the
        # dataframe so genes are row and cells are columns. Stick that data
        # into the expression dataset.
        for cells_df in self._generate_expression_dfs(50):
            pivoted = cells_df.pivot(
                index="featurekey", columns="cellkey",
                values="exprvalue").reindex(index=gene_df.index).fillna(0.0)
            cellkeys.extend(pivoted.columns.to_list())
            matrix_dataset[:, cell_counter:cell_counter +
                           pivoted.shape[1]] = pivoted
            cell_counter += pivoted.shape[1]
        matrix_dataset.attrs["last_modified"] = self._loom_timestamp()

        # Now write the metadata into different datasets according to the loom
        # spec.
        cell_df = self.query_results[QueryType.CELL].load_results().reindex(
            index=cellkeys)
        col_attrs_group = loom_file.create_group("col_attrs")
        cell_id_dset = col_attrs_group.create_dataset(
            "CellID",
            data=loompy.normalize_attr_values(cell_df.index.to_numpy()),
            compression='gzip',
            compression_opts=2,
            chunks=(min(256, cell_count), ))
        cell_id_dset.attrs["last_modified"] = self._loom_timestamp()

        for cell_metadata_field in cell_df:
            cell_metadata = cell_df[cell_metadata_field]
            dset = col_attrs_group.create_dataset(
                cell_metadata_field,
                data=loompy.normalize_attr_values(cell_metadata.to_numpy()),
                compression='gzip',
                compression_opts=2,
                chunks=(min(256, cell_count), ))
            dset.attrs["last_modified"] = self._loom_timestamp()
        col_attrs_group.attrs["last_modified"] = self._loom_timestamp()

        row_attrs_group = loom_file.create_group("row_attrs")
        acc_dset = row_attrs_group.create_dataset(
            "Accession",
            data=loompy.normalize_attr_values(gene_df.index.to_numpy()),
            compression='gzip',
            compression_opts=2,
            chunks=(min(256, gene_count), ))
        acc_dset.attrs["last_modified"] = self._loom_timestamp()
        name_dset = row_attrs_group.create_dataset(
            "Gene",
            data=loompy.normalize_attr_values(
                gene_df["featurename"].to_numpy()),
            compression='gzip',
            compression_opts=2,
            chunks=(min(256, gene_count), ))
        name_dset.attrs["last_modified"] = self._loom_timestamp()

        for gene_metadata_field in gene_df:
            if gene_metadata_field == "featurename":
                continue
            gene_metadata = gene_df[gene_metadata_field]
            dset = row_attrs_group.create_dataset(
                gene_metadata_field,
                data=loompy.normalize_attr_values(gene_metadata.to_numpy()),
                compression='gzip',
                compression_opts=2,
                chunks=(min(256, gene_count), ))
            dset.attrs["last_modified"] = self._loom_timestamp()
        row_attrs_group.attrs["last_modified"] = self._loom_timestamp()

        # These two groups are defined in the spec, but matrix service outputs
        # don't use them.
        loom_file.create_group("layers")
        loom_file.create_group("row_graphs")
        loom_file.create_group("col_graphs")

        loom_file.attrs["last_modified"] = self._loom_timestamp()

        return loom_path