def last_modified(self) -> str: """ Return a compact ISO8601 timestamp (UTC timezone) indicating when the file was last modified Note: if the layer does not contain a timestamp, and the mode is 'r+', a new timestamp will be set and returned. Otherwise, the current time in UTC will be returned. """ if self.name == "": if "last_modified" in self.ds._file["/matrix"].attrs: return self.ds._file["/matrix"].attrs["last_modified"] elif self.ds._file.mode == 'r+': self.ds._file["/matrix"].attrs["last_modified"] = timestamp() self.ds._file.flush() return self.ds._file["/matrix"].attrs["last_modified"] if self.name != "": if "last_modified" in self.ds._file["/layers/" + self.name].attrs: return self.ds._file["/layers/" + self.name].attrs["last_modified"] elif self.ds._file.mode == 'r+': self.ds._file["/layers/" + self.name].attrs["last_modified"] = timestamp() self.ds._file.flush() return self.ds._file["/layers/" + self.name].attrs["last_modified"] return timestamp()
def last_modified(self, name: str = None) -> str: """ Return a compact ISO8601 timestamp (UTC timezone) indicating when a graph was last modified Note: if no graph name is given (the default), the modification time of the most recently modified graph will be returned Note: if the graphs do not contain a timestamp, and the mode is 'r+', a new timestamp is created and returned. Otherwise, the current time in UTC will be returned. """ a = ["row_graphs", "col_graphs"][self.axis] if name is None: if "last_modified" in self.ds._file[a].attrs: return self.ds._file[a].attrs["last_modified"] elif self.ds._file.mode == 'r+': self.ds._file[a].attrs["last_modified"] = timestamp() self.ds._file.flush() return self.ds._file[a].attrs["last_modified"] if name is not None: if "last_modified" in self.ds._file[a + name].attrs: return self.ds._file[a][name].attrs["last_modified"] elif self.ds._file.mode == 'r+': self.ds._file[a][name].attrs["last_modified"] = timestamp() self.ds._file.flush() return self.ds._file[a][name].attrs["last_modified"] return timestamp()
def last_modified(self, name: str = None) -> str: """ Return a compact ISO8601 timestamp (UTC timezone) indicating when an attribute was last modified Note: if no attribute name is given (the default), the modification time of the most recently modified attribute will be returned Note: if the attributes do not contain a timestamp, and the mode is 'r+', a new timestamp is created and returned. Otherwise, the current time in UTC will be returned. """ a = ["/row_attrs/", "/col_attrs/"][self.axis] if self.ds is not None: if name is None: if "last_modified" in self.ds._file[a].attrs: return self.ds._file[a].attrs["last_modified"] elif self.ds._file.mode == 'r+': self.ds._file[a].attrs["last_modified"] = timestamp() if isinstance(self.ds._file, h5py.File): self.ds._file.flush() return self.ds._file[a].attrs["last_modified"] if name is not None: if "last_modified" in self.ds._file[a + name].attrs: return self.ds._file[a + name].attrs["last_modified"] elif self.ds._file.mode == 'r+': self.ds._file[a + name].attrs["last_modified"] = timestamp() if isinstance(self.ds._file, h5py.File): self.ds._file.flush() return self.ds._file[a + name].attrs["last_modified"] return timestamp()
def __setattr__(self, name: str, g: sparse.coo_matrix) -> None: if name.startswith("!"): super(GraphManager, self).__setattr__(name[1:], g) else: g = sparse.coo_matrix(g) if self.ds is not None: a = ["row_graphs", "col_graphs"][self.axis] if g.shape[0] != self.ds.shape[ self.axis] or g.shape[1] != self.ds.shape[self.axis]: raise ValueError( f"Adjacency matrix shape for axis {self.axis} must be ({self.ds.shape[self.axis]},{self.ds.shape[self.axis]}) but shape was {g.shape}" ) if name in self.ds._file[a]: del self.ds._file[a][name]["a"] del self.ds._file[a][name]["b"] del self.ds._file[a][name]["w"] del self.ds._file[a][name] self.ds._file[a].create_group(name) self.ds._file[a][name]["a"] = g.row self.ds._file[a][name]["b"] = g.col self.ds._file[a][name]["w"] = g.data self.ds._file[a][name].attrs["last_modified"] = timestamp() self.ds._file[a].attrs["last_modified"] = timestamp() self.ds._file.attrs["last_modified"] = timestamp() self.ds._file.flush() self.__dict__["storage"][name] = g else: self.__dict__["storage"][name] = g
def __setattr__(self, name: str, val: np.ndarray) -> None: """ Set the value of a named attribute Args: name (str) Name of the attribute val (np.ndarray) Value of the attribute Remarks: Length must match the corresponding matrix dimension The values are automatically HMTL escaped and converted to ASCII for storage """ if name.startswith("!"): super(AttributeManager, self).__setattr__(name[1:], val) elif "/" in name: raise KeyError("Attribute name cannot contain slash (/)") else: if self.ds is not None: values = loompy.normalize_attr_values(val) a = ["/row_attrs/", "/col_attrs/"][self.axis] if self.ds.shape[self.axis] != 0 and values.shape[0] != self.ds.shape[self.axis]: raise ValueError(f"Attribute must have exactly {self.ds.shape[self.axis]} values but {len(values)} were given") if self.ds._file[a].__contains__(name): del self.ds._file[a + name] self.ds._file[a + name] = values # TODO: for 2D arrays, use block compression along columns/rows self.ds._file[a + name].attrs["last_modified"] = timestamp() self.ds._file[a].attrs["last_modified"] = timestamp() self.ds._file.attrs["last_modified"] = timestamp() self.ds._file.flush() self.__dict__["storage"][name] = loompy.materialize_attr_values(self.ds._file[a][name][:]) else: self.__dict__["storage"][name] = val
def __setattr__(self, name: str, val: np.ndarray) -> None: """ Set the value of a named attribute Args: name (str) Name of the attribute val (np.ndarray) Value of the attribute Remarks: Length must match the corresponding matrix dimension The values are automatically HMTL escaped and converted to ASCII for storage """ if name.startswith("!"): super(AttributeManager, self).__setattr__(name[1:], val) elif "/" in name: raise KeyError("Attribute name cannot contain slash (/)") else: if self.ds is not None: values = loompy.normalize_attr_values( val, compare_loom_spec_version(self.ds._file, "3.0.0") >= 0) a = ["/row_attrs/", "/col_attrs/"][self.axis] if self.ds.shape[self.axis] != 0 and values.shape[ 0] != self.ds.shape[self.axis]: raise ValueError( f"Attribute '{name}' must have exactly {self.ds.shape[self.axis]} values but {len(values)} were given" ) if self.ds._file[a].__contains__(name): del self.ds._file[a + name] if isinstance(self.ds._file, h5py.File): self.ds._file.create_dataset( a + name, data=values, dtype=h5py.special_dtype(vlen=str) if values.dtype == np.object_ else values.dtype, maxshape=(values.shape[0], ) if len(values.shape) == 1 else (values.shape[0], None), fletcher32=False, compression="gzip", shuffle=False, compression_opts=2) else: self.ds._file.create_dataset( a + name, data=values.astype(np.string_) if values.dtype == np.object_ else values) self.ds._file[a + name].attrs["last_modified"] = timestamp() self.ds._file[a].attrs["last_modified"] = timestamp() self.ds._file.attrs["last_modified"] = timestamp() if isinstance(self.ds._file, h5py.File): self.ds._file.flush() self.__dict__["storage"][ name] = loompy.materialize_attr_values( self.ds._file[a][name][:]) else: self.__dict__["storage"][name] = val
def __setitem__(self, slice: Tuple[Union[int, slice], Union[int, slice]], data: np.ndarray) -> None: if self.name == "": self.ds._file['/matrix'][slice] = data self.ds._file["/matrix"].attrs["last_modified"] = timestamp() self.ds._file.attrs["last_modified"] = timestamp() self.ds._file.flush() else: self.ds._file['/layers/' + self.name][slice] = data self.ds._file["/layers/" + self.name].attrs["last_modified"] = timestamp() self.ds._file.attrs["last_modified"] = timestamp() self.ds._file.flush()
def last_modified(self) -> str: """ Return an ISO8601 timestamp when the file was last modified Note: if the file has no timestamp, and mode is 'r+', a new timestamp is created and returned. Otherwise, the current time in UTC is returned """ if "last_modified" in self._file.attrs: return self._file.attrs["last_modified"] elif self._file.mode == "r+": # Make sure the file has modification timestamps self._file.attrs["last_modified"] = timestamp() self._file.flush() return self._file.attrs["last_modified"] return timestamp()
def downloadSubLoom(self, request, context): start_time = time.time() loom = self.lfh.get_loom(loom_file_path=request.loomFilePath) loom_connection = loom.get_connection() meta_data = loom.get_meta_data() file_name = request.loomFilePath # Check if not a public loom file if '/' in request.loomFilePath: l = request.loomFilePath.split("/") file_name = l[1].split(".")[0] if (request.featureType == "clusterings"): a = list( filter(lambda x: x['name'] == request.featureName, meta_data["clusterings"])) b = list( filter(lambda x: x['description'] == request.featureValue, a[0]['clusters']))[0] cells = loom_connection.ca["Clusterings"][str( a[0]['id'])] == b['id'] print("Number of cells in {0}: {1}".format(request.featureValue, np.sum(cells))) sub_loom_file_name = file_name + "_Sub_" + request.featureValue.replace( " ", "_").replace("/", "_") sub_loom_file_path = os.path.join( self.dfh.get_data_dirs()['Loom']['path'], "tmp", sub_loom_file_name + ".loom") # Check if the file already exists if os.path.exists(path=sub_loom_file_path): os.remove(path=sub_loom_file_path) # Create new file attributes sub_loom_file_attrs = dict() sub_loom_file_attrs["title"] = sub_loom_file_name sub_loom_file_attrs['CreationDate'] = timestamp() sub_loom_file_attrs["LOOM_SPEC_VERSION"] = _version.__version__ sub_loom_file_attrs[ "note"] = "This loom is a subset of {0} loom file".format( Loom.clean_file_attr( file_attr=loom_connection.attrs["title"])) sub_loom_file_attrs["MetaData"] = Loom.clean_file_attr( file_attr=loom_connection.attrs["MetaData"]) # - Use scan to subset cells (much faster than naive subsetting): avoid to load everything into memory # - Loompy bug: loompy.create_append works but generate a file much bigger than its parent # So prepare all the data and create the loom afterwards print("Subsetting {0} cluster from the active .loom...".format( request.featureValue)) sub_matrix = None sub_selection = None for (_, selection, _) in loom_connection.scan(items=cells, axis=1): if sub_matrix is None: sub_matrix = loom_connection[:, selection] sub_selection = selection else: sub_matrix = np.concatenate( (sub_matrix, loom_connection[:, selection]), axis=1) sub_selection = np.concatenate((sub_selection, selection), axis=0) # Send the progress processed = len(sub_selection) / sum(cells) yield s_pb2.DownloadSubLoomReply( loomFilePath="", loomFileSize=0, progress=s_pb2.Progress(value=processed, status="Sub Loom Created!"), isDone=False) print("Creating {0} sub .loom...".format(request.featureValue)) lp.create(sub_loom_file_path, sub_matrix, row_attrs=loom_connection.ra, col_attrs=loom_connection.ca[sub_selection], file_attrs=sub_loom_file_attrs) with open(sub_loom_file_path, 'r') as fh: loom_file_size = os.fstat(fh.fileno())[6] print("Done!") print("Debug: %s seconds elapsed ---" % (time.time() - start_time)) else: print("This feature is currently not implemented.") yield s_pb2.DownloadSubLoomReply(loomFilePath=sub_loom_file_path, loomFileSize=loom_file_size, progress=s_pb2.Progress( value=1.0, status="Sub Loom Created!"), isDone=True)
def downloadSubLoom(self, request, context): start_time = time.time() loom = self.lfh.get_loom(loom_file_path=Path(request.loomFilePath)) loom_connection = loom.get_connection() meta_data = loom.get_meta_data() file_name = request.loomFilePath # Check if not a public loom file if "/" in request.loomFilePath: loom_name = request.loomFilePath.split("/") file_name = loom_name[1].split(".")[0] if request.featureType == "clusterings": a = list( filter(lambda x: x["name"] == request.featureName, meta_data["clusterings"])) b = list( filter(lambda x: x["description"] == request.featureValue, a[0]["clusters"]))[0] cells = loom_connection.ca["Clusterings"][str( a[0]["id"])] == b["id"] logger.debug("Number of cells in {0}: {1}".format( request.featureValue, np.sum(cells))) sub_loom_file_name = file_name + "_Sub_" + request.featureValue.replace( " ", "_").replace("/", "_") elif request.featureType == "cellSelection": cells = np.full(loom.get_nb_cells(), False) cells[request.cellIndices] = True logger.debug( f"Number of cells in selection: {len(request.cellIndices)}") sub_loom_file_name = ( f"{file_name}_CellSelection_{request.featureValue}_{datetime.datetime.now().strftime('%y%m%d_%H%M')}" ) else: logger.error("This feature is currently not implemented.") return if not os.path.exists( os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp")): os.mkdir( os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp")) sub_loom_file_path = os.path.join( self.dfh.get_data_dirs()["Loom"]["path"], "tmp", sub_loom_file_name + ".loom") # Check if the file already exists if os.path.exists(path=sub_loom_file_path): os.remove(path=sub_loom_file_path) # Create new file attributes sub_loom_file_attrs = dict() sub_loom_file_attrs["title"] = sub_loom_file_name sub_loom_file_attrs["CreationDate"] = timestamp() sub_loom_file_attrs["LOOM_SPEC_VERSION"] = _version.__version__ if "title" in loom_connection.attrs: sub_loom_file_attrs[ "note"] = f"This loom is a subset of {Loom.clean_file_attr(file_attr=loom_connection.attrs['title'])} loom file" else: sub_loom_file_attrs[ "note"] = f"This loom is a subset of {request.loomFilePath} loom file" sub_loom_file_attrs["MetaData"] = Loom.clean_file_attr( file_attr=loom_connection.attrs["MetaData"]) # - Use scan to subset cells (much faster than naive subsetting): avoid to load everything into memory # - Loompy bug: loompy.create_append works but generate a file much bigger than its parent # So prepare all the data and create the loom afterwards logger.debug("Subsetting {0} cluster from the active .loom...".format( request.featureValue)) processed = 0 tot_cells = loom.get_nb_cells() yield s_pb2.DownloadSubLoomReply( loomFilePath="", loomFileSize=0, progress=s_pb2.Progress(value=0.01, status="Sub Loom creation started!"), isDone=False, ) sub_matrices = [] for (idx, _, view) in loom_connection.scan(items=cells, axis=1, batch_size=5120): sub_matrices.append(view[:, :]) # Send the progress processed = idx / tot_cells yield s_pb2.DownloadSubLoomReply( loomFilePath="", loomFileSize=0, progress=s_pb2.Progress(value=processed, status="Sub Loom Created!"), isDone=False, ) yield s_pb2.DownloadSubLoomReply( loomFilePath="", loomFileSize=0, progress=s_pb2.Progress(value=0.99, status="Sub Loom Created!"), isDone=False, ) sub_matrix = np.concatenate(sub_matrices, axis=1) logger.debug("Creating {0} sub .loom...".format(request.featureValue)) lp.create( sub_loom_file_path, sub_matrix, row_attrs=loom_connection.ra, col_attrs=loom_connection.ca[cells], file_attrs=sub_loom_file_attrs, ) del sub_matrix with open(sub_loom_file_path, "r") as fh: loom_file_size = os.fstat(fh.fileno())[6] logger.debug( "{0:.5f} seconds elapsed making loom ---".format(time.time() - start_time)) yield s_pb2.DownloadSubLoomReply( loomFilePath=sub_loom_file_path, loomFileSize=loom_file_size, progress=s_pb2.Progress(value=1.0, status="Sub Loom Created!"), isDone=True, )
def create(filename: str, layers: Union[np.ndarray, Dict[str, np.ndarray], loompy.LayerManager], row_attrs: Dict[str, np.ndarray], col_attrs: Dict[str, np.ndarray], *, file_attrs: Dict[str, str] = None) -> None: """ Create a new .loom file from the given data. Args: filename (str): The filename (typically using a `.loom` file extension) layers (np.ndarray or scipy.sparse or Dict[str, np.ndarray] or LayerManager): Two-dimensional (N-by-M) numpy ndarray of float values Or sparse matrix Or dictionary of named layers, each an N-by-M ndarray or LayerManager, each layer an N-by-M ndarray row_attrs (dict): Row attributes, where keys are attribute names and values are numpy arrays (float or string) of length N col_attrs (dict): Column attributes, where keys are attribute names and values are numpy arrays (float or string) of length M file_attrs (dict): Global attributes, where keys are attribute names and values are strings Returns: Nothing Remarks: If the file exists, it will be overwritten. See create_append for a function that will append to existing files. """ if filename.startswith("~/"): filename = os.path.expanduser(filename) if file_attrs is None: file_attrs = {} if isinstance(layers, np.ndarray): layers = {"": layers} elif scipy.sparse.issparse(layers): _create_sparse(filename, layers, row_attrs, col_attrs, file_attrs=file_attrs) return elif isinstance(layers, loompy.LayerManager): layers = {k: v[:, :] for k, v in layers.items()} if "" not in layers: raise ValueError("Data for default layer must be provided") # Create the file (empty). # Yes, this might cause an exception, which we prefer to send to the caller f = h5py.File(name=filename, mode='w') f.create_group('/layers') f.create_group('/row_attrs') f.create_group('/col_attrs') f.create_group('/row_graphs') f.create_group('/col_graphs') f.flush() f.close() try: with connect(filename) as ds: for key, vals in layers.items(): ds.layer[key] = vals for key, vals in row_attrs.items(): ds.ra[key] = vals for key, vals in col_attrs.items(): ds.ca[key] = vals for vals in file_attrs: ds.attrs[vals] = file_attrs[vals] # store creation date currentTime = time.localtime(time.time()) ds.attrs['CreationDate'] = timestamp() ds.attrs["LOOM_SPEC_VERSION"] = loompy.loom_spec_version except ValueError as ve: ds.close(suppress_warning=True) os.remove(filename) raise ve