Example #1
0
    def last_modified(self) -> str:
        """
		Return a compact ISO8601 timestamp (UTC timezone) indicating when the file was last modified

		Note: if the layer does not contain a timestamp, and the mode is 'r+', a new timestamp will be set and returned.
		Otherwise, the current time in UTC will be returned.
		"""
        if self.name == "":
            if "last_modified" in self.ds._file["/matrix"].attrs:
                return self.ds._file["/matrix"].attrs["last_modified"]
            elif self.ds._file.mode == 'r+':
                self.ds._file["/matrix"].attrs["last_modified"] = timestamp()
                self.ds._file.flush()
                return self.ds._file["/matrix"].attrs["last_modified"]

        if self.name != "":
            if "last_modified" in self.ds._file["/layers/" + self.name].attrs:
                return self.ds._file["/layers/" +
                                     self.name].attrs["last_modified"]
            elif self.ds._file.mode == 'r+':
                self.ds._file["/layers/" +
                              self.name].attrs["last_modified"] = timestamp()
                self.ds._file.flush()
                return self.ds._file["/layers/" +
                                     self.name].attrs["last_modified"]

        return timestamp()
Example #2
0
    def last_modified(self, name: str = None) -> str:
        """
		Return a compact ISO8601 timestamp (UTC timezone) indicating when a graph was last modified

		Note: if no graph name is given (the default), the modification time of the most recently modified graph will be returned
		Note: if the graphs do not contain a timestamp, and the mode is 'r+', a new timestamp is created and returned.
		Otherwise, the current time in UTC will be returned.
		"""
        a = ["row_graphs", "col_graphs"][self.axis]

        if name is None:
            if "last_modified" in self.ds._file[a].attrs:
                return self.ds._file[a].attrs["last_modified"]
            elif self.ds._file.mode == 'r+':
                self.ds._file[a].attrs["last_modified"] = timestamp()
                self.ds._file.flush()
                return self.ds._file[a].attrs["last_modified"]
        if name is not None:
            if "last_modified" in self.ds._file[a + name].attrs:
                return self.ds._file[a][name].attrs["last_modified"]
            elif self.ds._file.mode == 'r+':
                self.ds._file[a][name].attrs["last_modified"] = timestamp()
                self.ds._file.flush()
                return self.ds._file[a][name].attrs["last_modified"]
        return timestamp()
Example #3
0
    def last_modified(self, name: str = None) -> str:
        """
		Return a compact ISO8601 timestamp (UTC timezone) indicating when an attribute was last modified

		Note: if no attribute name is given (the default), the modification time of the most recently modified attribute will be returned
		Note: if the attributes do not contain a timestamp, and the mode is 'r+', a new timestamp is created and returned.
		Otherwise, the current time in UTC will be returned.
		"""
        a = ["/row_attrs/", "/col_attrs/"][self.axis]

        if self.ds is not None:
            if name is None:
                if "last_modified" in self.ds._file[a].attrs:
                    return self.ds._file[a].attrs["last_modified"]
                elif self.ds._file.mode == 'r+':
                    self.ds._file[a].attrs["last_modified"] = timestamp()
                    if isinstance(self.ds._file, h5py.File):
                        self.ds._file.flush()
                    return self.ds._file[a].attrs["last_modified"]
            if name is not None:
                if "last_modified" in self.ds._file[a + name].attrs:
                    return self.ds._file[a + name].attrs["last_modified"]
                elif self.ds._file.mode == 'r+':
                    self.ds._file[a +
                                  name].attrs["last_modified"] = timestamp()
                    if isinstance(self.ds._file, h5py.File):
                        self.ds._file.flush()
                    return self.ds._file[a + name].attrs["last_modified"]
        return timestamp()
Example #4
0
 def __setattr__(self, name: str, g: sparse.coo_matrix) -> None:
     if name.startswith("!"):
         super(GraphManager, self).__setattr__(name[1:], g)
     else:
         g = sparse.coo_matrix(g)
         if self.ds is not None:
             a = ["row_graphs", "col_graphs"][self.axis]
             if g.shape[0] != self.ds.shape[
                     self.axis] or g.shape[1] != self.ds.shape[self.axis]:
                 raise ValueError(
                     f"Adjacency matrix shape for axis {self.axis} must be ({self.ds.shape[self.axis]},{self.ds.shape[self.axis]}) but shape was {g.shape}"
                 )
             if name in self.ds._file[a]:
                 del self.ds._file[a][name]["a"]
                 del self.ds._file[a][name]["b"]
                 del self.ds._file[a][name]["w"]
                 del self.ds._file[a][name]
             self.ds._file[a].create_group(name)
             self.ds._file[a][name]["a"] = g.row
             self.ds._file[a][name]["b"] = g.col
             self.ds._file[a][name]["w"] = g.data
             self.ds._file[a][name].attrs["last_modified"] = timestamp()
             self.ds._file[a].attrs["last_modified"] = timestamp()
             self.ds._file.attrs["last_modified"] = timestamp()
             self.ds._file.flush()
             self.__dict__["storage"][name] = g
         else:
             self.__dict__["storage"][name] = g
Example #5
0
	def __setattr__(self, name: str, val: np.ndarray) -> None:
		"""
		Set the value of a named attribute

		Args:
			name (str) 			Name of the attribute
			val (np.ndarray)	Value of the attribute

		Remarks:
			Length must match the corresponding matrix dimension
			The values are automatically HMTL escaped and converted to ASCII for storage
		"""
		if name.startswith("!"):
			super(AttributeManager, self).__setattr__(name[1:], val)
		elif "/" in name:
			raise KeyError("Attribute name cannot contain slash (/)")
		else:
			if self.ds is not None:
				values = loompy.normalize_attr_values(val)
				a = ["/row_attrs/", "/col_attrs/"][self.axis]
				if self.ds.shape[self.axis] != 0 and values.shape[0] != self.ds.shape[self.axis]:
					raise ValueError(f"Attribute must have exactly {self.ds.shape[self.axis]} values but {len(values)} were given")
				if self.ds._file[a].__contains__(name):
					del self.ds._file[a + name]
				self.ds._file[a + name] = values  # TODO: for 2D arrays, use block compression along columns/rows
				self.ds._file[a + name].attrs["last_modified"] = timestamp()
				self.ds._file[a].attrs["last_modified"] = timestamp()
				self.ds._file.attrs["last_modified"] = timestamp()
				self.ds._file.flush()
				self.__dict__["storage"][name] = loompy.materialize_attr_values(self.ds._file[a][name][:])
			else:
				self.__dict__["storage"][name] = val
Example #6
0
    def __setattr__(self, name: str, val: np.ndarray) -> None:
        """
		Set the value of a named attribute

		Args:
			name (str) 			Name of the attribute
			val (np.ndarray)	Value of the attribute

		Remarks:
			Length must match the corresponding matrix dimension
			The values are automatically HMTL escaped and converted to ASCII for storage
		"""
        if name.startswith("!"):
            super(AttributeManager, self).__setattr__(name[1:], val)
        elif "/" in name:
            raise KeyError("Attribute name cannot contain slash (/)")
        else:
            if self.ds is not None:
                values = loompy.normalize_attr_values(
                    val,
                    compare_loom_spec_version(self.ds._file, "3.0.0") >= 0)
                a = ["/row_attrs/", "/col_attrs/"][self.axis]
                if self.ds.shape[self.axis] != 0 and values.shape[
                        0] != self.ds.shape[self.axis]:
                    raise ValueError(
                        f"Attribute '{name}' must have exactly {self.ds.shape[self.axis]} values but {len(values)} were given"
                    )
                if self.ds._file[a].__contains__(name):
                    del self.ds._file[a + name]

                if isinstance(self.ds._file, h5py.File):
                    self.ds._file.create_dataset(
                        a + name,
                        data=values,
                        dtype=h5py.special_dtype(vlen=str)
                        if values.dtype == np.object_ else values.dtype,
                        maxshape=(values.shape[0], )
                        if len(values.shape) == 1 else (values.shape[0], None),
                        fletcher32=False,
                        compression="gzip",
                        shuffle=False,
                        compression_opts=2)
                else:
                    self.ds._file.create_dataset(
                        a + name,
                        data=values.astype(np.string_)
                        if values.dtype == np.object_ else values)

                self.ds._file[a + name].attrs["last_modified"] = timestamp()
                self.ds._file[a].attrs["last_modified"] = timestamp()
                self.ds._file.attrs["last_modified"] = timestamp()
                if isinstance(self.ds._file, h5py.File):
                    self.ds._file.flush()
                self.__dict__["storage"][
                    name] = loompy.materialize_attr_values(
                        self.ds._file[a][name][:])
            else:
                self.__dict__["storage"][name] = val
Example #7
0
	def __setitem__(self, slice: Tuple[Union[int, slice], Union[int, slice]], data: np.ndarray) -> None:
		if self.name == "":
			self.ds._file['/matrix'][slice] = data
			self.ds._file["/matrix"].attrs["last_modified"] = timestamp()
			self.ds._file.attrs["last_modified"] = timestamp()
			self.ds._file.flush()
		else:
			self.ds._file['/layers/' + self.name][slice] = data
			self.ds._file["/layers/" + self.name].attrs["last_modified"] = timestamp()
			self.ds._file.attrs["last_modified"] = timestamp()
			self.ds._file.flush()
Example #8
0
	def last_modified(self) -> str:
		"""
		Return an ISO8601 timestamp when the file was last modified

		Note: if the file has no timestamp, and mode is 'r+', a new timestamp is created and returned.
		Otherwise, the current time in UTC is returned
		"""
		if "last_modified" in self._file.attrs:
			return self._file.attrs["last_modified"]
		elif self._file.mode == "r+":
			# Make sure the file has modification timestamps
			self._file.attrs["last_modified"] = timestamp()
			self._file.flush()
			return self._file.attrs["last_modified"]
		return timestamp()
Example #9
0
    def downloadSubLoom(self, request, context):
        start_time = time.time()

        loom = self.lfh.get_loom(loom_file_path=request.loomFilePath)
        loom_connection = loom.get_connection()
        meta_data = loom.get_meta_data()

        file_name = request.loomFilePath
        # Check if not a public loom file
        if '/' in request.loomFilePath:
            l = request.loomFilePath.split("/")
            file_name = l[1].split(".")[0]

        if (request.featureType == "clusterings"):
            a = list(
                filter(lambda x: x['name'] == request.featureName,
                       meta_data["clusterings"]))
            b = list(
                filter(lambda x: x['description'] == request.featureValue,
                       a[0]['clusters']))[0]
            cells = loom_connection.ca["Clusterings"][str(
                a[0]['id'])] == b['id']
            print("Number of cells in {0}: {1}".format(request.featureValue,
                                                       np.sum(cells)))
            sub_loom_file_name = file_name + "_Sub_" + request.featureValue.replace(
                " ", "_").replace("/", "_")
            sub_loom_file_path = os.path.join(
                self.dfh.get_data_dirs()['Loom']['path'], "tmp",
                sub_loom_file_name + ".loom")
            # Check if the file already exists
            if os.path.exists(path=sub_loom_file_path):
                os.remove(path=sub_loom_file_path)
            # Create new file attributes
            sub_loom_file_attrs = dict()
            sub_loom_file_attrs["title"] = sub_loom_file_name
            sub_loom_file_attrs['CreationDate'] = timestamp()
            sub_loom_file_attrs["LOOM_SPEC_VERSION"] = _version.__version__
            sub_loom_file_attrs[
                "note"] = "This loom is a subset of {0} loom file".format(
                    Loom.clean_file_attr(
                        file_attr=loom_connection.attrs["title"]))
            sub_loom_file_attrs["MetaData"] = Loom.clean_file_attr(
                file_attr=loom_connection.attrs["MetaData"])
            # - Use scan to subset cells (much faster than naive subsetting): avoid to load everything into memory
            # - Loompy bug: loompy.create_append works but generate a file much bigger than its parent
            #      So prepare all the data and create the loom afterwards
            print("Subsetting {0} cluster from the active .loom...".format(
                request.featureValue))
            sub_matrix = None
            sub_selection = None
            for (_, selection, _) in loom_connection.scan(items=cells, axis=1):
                if sub_matrix is None:
                    sub_matrix = loom_connection[:, selection]
                    sub_selection = selection
                else:
                    sub_matrix = np.concatenate(
                        (sub_matrix, loom_connection[:, selection]), axis=1)
                    sub_selection = np.concatenate((sub_selection, selection),
                                                   axis=0)
                # Send the progress
                processed = len(sub_selection) / sum(cells)
                yield s_pb2.DownloadSubLoomReply(
                    loomFilePath="",
                    loomFileSize=0,
                    progress=s_pb2.Progress(value=processed,
                                            status="Sub Loom Created!"),
                    isDone=False)
            print("Creating {0} sub .loom...".format(request.featureValue))
            lp.create(sub_loom_file_path,
                      sub_matrix,
                      row_attrs=loom_connection.ra,
                      col_attrs=loom_connection.ca[sub_selection],
                      file_attrs=sub_loom_file_attrs)
            with open(sub_loom_file_path, 'r') as fh:
                loom_file_size = os.fstat(fh.fileno())[6]
            print("Done!")
            print("Debug: %s seconds elapsed ---" % (time.time() - start_time))
        else:
            print("This feature is currently not implemented.")
        yield s_pb2.DownloadSubLoomReply(loomFilePath=sub_loom_file_path,
                                         loomFileSize=loom_file_size,
                                         progress=s_pb2.Progress(
                                             value=1.0,
                                             status="Sub Loom Created!"),
                                         isDone=True)
Example #10
0
    def downloadSubLoom(self, request, context):
        start_time = time.time()

        loom = self.lfh.get_loom(loom_file_path=Path(request.loomFilePath))
        loom_connection = loom.get_connection()
        meta_data = loom.get_meta_data()

        file_name = request.loomFilePath
        # Check if not a public loom file
        if "/" in request.loomFilePath:
            loom_name = request.loomFilePath.split("/")
            file_name = loom_name[1].split(".")[0]

        if request.featureType == "clusterings":
            a = list(
                filter(lambda x: x["name"] == request.featureName,
                       meta_data["clusterings"]))
            b = list(
                filter(lambda x: x["description"] == request.featureValue,
                       a[0]["clusters"]))[0]
            cells = loom_connection.ca["Clusterings"][str(
                a[0]["id"])] == b["id"]
            logger.debug("Number of cells in {0}: {1}".format(
                request.featureValue, np.sum(cells)))
            sub_loom_file_name = file_name + "_Sub_" + request.featureValue.replace(
                " ", "_").replace("/", "_")
        elif request.featureType == "cellSelection":
            cells = np.full(loom.get_nb_cells(), False)
            cells[request.cellIndices] = True
            logger.debug(
                f"Number of cells in selection: {len(request.cellIndices)}")
            sub_loom_file_name = (
                f"{file_name}_CellSelection_{request.featureValue}_{datetime.datetime.now().strftime('%y%m%d_%H%M')}"
            )
        else:
            logger.error("This feature is currently not implemented.")
            return

        if not os.path.exists(
                os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp")):
            os.mkdir(
                os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp"))
        sub_loom_file_path = os.path.join(
            self.dfh.get_data_dirs()["Loom"]["path"], "tmp",
            sub_loom_file_name + ".loom")
        # Check if the file already exists
        if os.path.exists(path=sub_loom_file_path):
            os.remove(path=sub_loom_file_path)
        # Create new file attributes
        sub_loom_file_attrs = dict()
        sub_loom_file_attrs["title"] = sub_loom_file_name
        sub_loom_file_attrs["CreationDate"] = timestamp()
        sub_loom_file_attrs["LOOM_SPEC_VERSION"] = _version.__version__
        if "title" in loom_connection.attrs:
            sub_loom_file_attrs[
                "note"] = f"This loom is a subset of {Loom.clean_file_attr(file_attr=loom_connection.attrs['title'])} loom file"
        else:
            sub_loom_file_attrs[
                "note"] = f"This loom is a subset of {request.loomFilePath} loom file"
        sub_loom_file_attrs["MetaData"] = Loom.clean_file_attr(
            file_attr=loom_connection.attrs["MetaData"])
        # - Use scan to subset cells (much faster than naive subsetting): avoid to load everything into memory
        # - Loompy bug: loompy.create_append works but generate a file much bigger than its parent
        #      So prepare all the data and create the loom afterwards
        logger.debug("Subsetting {0} cluster from the active .loom...".format(
            request.featureValue))
        processed = 0
        tot_cells = loom.get_nb_cells()
        yield s_pb2.DownloadSubLoomReply(
            loomFilePath="",
            loomFileSize=0,
            progress=s_pb2.Progress(value=0.01,
                                    status="Sub Loom creation started!"),
            isDone=False,
        )
        sub_matrices = []
        for (idx, _, view) in loom_connection.scan(items=cells,
                                                   axis=1,
                                                   batch_size=5120):
            sub_matrices.append(view[:, :])
            # Send the progress
            processed = idx / tot_cells
            yield s_pb2.DownloadSubLoomReply(
                loomFilePath="",
                loomFileSize=0,
                progress=s_pb2.Progress(value=processed,
                                        status="Sub Loom Created!"),
                isDone=False,
            )
        yield s_pb2.DownloadSubLoomReply(
            loomFilePath="",
            loomFileSize=0,
            progress=s_pb2.Progress(value=0.99, status="Sub Loom Created!"),
            isDone=False,
        )
        sub_matrix = np.concatenate(sub_matrices, axis=1)
        logger.debug("Creating {0} sub .loom...".format(request.featureValue))
        lp.create(
            sub_loom_file_path,
            sub_matrix,
            row_attrs=loom_connection.ra,
            col_attrs=loom_connection.ca[cells],
            file_attrs=sub_loom_file_attrs,
        )
        del sub_matrix
        with open(sub_loom_file_path, "r") as fh:
            loom_file_size = os.fstat(fh.fileno())[6]
        logger.debug(
            "{0:.5f} seconds elapsed making loom ---".format(time.time() -
                                                             start_time))

        yield s_pb2.DownloadSubLoomReply(
            loomFilePath=sub_loom_file_path,
            loomFileSize=loom_file_size,
            progress=s_pb2.Progress(value=1.0, status="Sub Loom Created!"),
            isDone=True,
        )
Example #11
0
def create(filename: str, layers: Union[np.ndarray, Dict[str, np.ndarray], loompy.LayerManager], row_attrs: Dict[str, np.ndarray], col_attrs: Dict[str, np.ndarray], *, file_attrs: Dict[str, str] = None) -> None:
	"""
	Create a new .loom file from the given data.

	Args:
		filename (str):         The filename (typically using a `.loom` file extension)
		layers (np.ndarray or scipy.sparse or Dict[str, np.ndarray] or LayerManager):
								Two-dimensional (N-by-M) numpy ndarray of float values
								Or sparse matrix
								Or dictionary of named layers, each an N-by-M ndarray
								or LayerManager, each layer an N-by-M ndarray
		row_attrs (dict):       Row attributes, where keys are attribute names and values
								are numpy arrays (float or string) of length N
		col_attrs (dict):       Column attributes, where keys are attribute names and
								values are numpy arrays (float or string) of length M
		file_attrs (dict):      Global attributes, where keys are attribute names and
								values are strings
	Returns:
		Nothing

	Remarks:
		If the file exists, it will be overwritten. See create_append for a function that will append to existing files.
	"""
	if filename.startswith("~/"):
		filename = os.path.expanduser(filename)
	if file_attrs is None:
		file_attrs = {}

	if isinstance(layers, np.ndarray):
		layers = {"": layers}
	elif scipy.sparse.issparse(layers):
		_create_sparse(filename, layers, row_attrs, col_attrs, file_attrs=file_attrs)
		return
	elif isinstance(layers, loompy.LayerManager):
		layers = {k: v[:, :] for k, v in layers.items()}
	if "" not in layers:
		raise ValueError("Data for default layer must be provided")

	# Create the file (empty).
	# Yes, this might cause an exception, which we prefer to send to the caller
	f = h5py.File(name=filename, mode='w')
	f.create_group('/layers')
	f.create_group('/row_attrs')
	f.create_group('/col_attrs')
	f.create_group('/row_graphs')
	f.create_group('/col_graphs')
	f.flush()
	f.close()

	try:
		with connect(filename) as ds:
			for key, vals in layers.items():
				ds.layer[key] = vals

			for key, vals in row_attrs.items():
				ds.ra[key] = vals

			for key, vals in col_attrs.items():
				ds.ca[key] = vals

			for vals in file_attrs:
				ds.attrs[vals] = file_attrs[vals]

			# store creation date
			currentTime = time.localtime(time.time())
			ds.attrs['CreationDate'] = timestamp()
			ds.attrs["LOOM_SPEC_VERSION"] = loompy.loom_spec_version

	except ValueError as ve:
		ds.close(suppress_warning=True)
		os.remove(filename)
		raise ve