Beispiel #1
0
    def __init__(self, filename: str, mode: str = 'r+') -> None:
        """
		Establish a connection to a .loom file.

		Args:
			filename:			Name of the .loom file to open
			mode:				read/write mode, accepts 'r+' (read/write) or
								'r' (read-only), defaults to 'r+' without arguments,
								and to 'r' with incorrect arguments

		Returns:
			Nothing.
		"""

        # make sure a valid mode was passed, if not default to read-only
        # because you probably are doing something that you don't want to
        if mode != 'r+' and mode != 'r':
            logging.warn(
                "Wrong mode passed to LoomConnection, using read-only")
            mode = 'r'
        self.mode = mode
        self.filename = filename
        self._file = h5py.File(filename, mode)
        self._closed = False
        try:
            if "matrix" in self._file:
                self.shape = self._file["/matrix"].shape
            else:
                self.shape = (0, 0)
            self.layers = loompy.LayerManager(self)
            self.view = loompy.ViewManager(self)
            self.ra = loompy.AttributeManager(self, axis=0)
            self.ca = loompy.AttributeManager(self, axis=1)
            self.attrs = loompy.FileAttributeManager(self._file)
            self.row_graphs = loompy.GraphManager(self, axis=0)
            self.col_graphs = loompy.GraphManager(self, axis=1)

            # Compatibility
            self.layer = self.layers
            self.row_attrs = self.ra
            self.col_attrs = self.ca

        except Exception as e:
            logging.warn(
                "initialising LoomConnection to %s failed, closing file connection",
                filename)
            self.close()
            raise e
Beispiel #2
0
	def scan(self, *, items: np.ndarray = None, axis: int = None, layers: Iterable = None, key: str = None, batch_size: int = 8 * 64) -> Iterable[Tuple[int, np.ndarray, loompy.LoomView]]:
		"""
		Scan across one axis and return batches of rows (columns) as LoomView objects

		Args
		----
		items: np.ndarray
			the indexes [0, 2, 13, ... ,973] of the rows/cols to include along the axis
			OR: boolean mask array giving the rows/cols to include
		axis: int
			0:rows or 1:cols
		batch_size: int
			the chuncks returned at every element of the iterator
		layers: iterable
			if specified it will batch scan only across some of the layers of the loom file
			if layers == None, all layers will be scanned
			if layers == [""] or "", only the default layer will be scanned
		key:
			Name of primary key attribute. If specified, return the values sorted by the key

		Returns
		------
		Iterable that yields triplets
		(ix, indexes, view)

		ix: int
			first position / how many rows/cols have been yielded alredy
		indexes: np.ndarray[int]
			the indexes with the same numbering of the input args cells / genes (i.e. np.arange(len(ds.shape[axis])))
			this is ix + selection
		view: LoomView
			a view corresponding to the current chunk
		"""
		if axis is None:
			raise ValueError("Axis must be given (0 = rows, 1 = cols)")
		if layers is None:
			layers = self.layers.keys()
		if layers == "":
			layers = [""]

		if (items is not None) and (np.issubdtype(items.dtype, np.bool_)):
			items = np.where(items)[0]

		ordering: np.ndarray = None
		vals: Dict[str, loompy.MemoryLoomLayer] = {}
		if axis == 1:
			if key is not None:
				ordering = np.argsort(self.ra[key])
			else:
				ordering = np.arange(self.shape[0])
			if items is None:
				items = np.fromiter(range(self.shape[1]), dtype='int')
			cols_per_chunk = batch_size
			ix = 0
			while ix < self.shape[1]:
				cols_per_chunk = min(self.shape[1] - ix, cols_per_chunk)
				selection = items - ix
				# Pick out the cells that are in this batch
				selection = selection[np.where(np.logical_and(selection >= 0, selection < cols_per_chunk))[0]]
				if selection.shape[0] == 0:
					ix += cols_per_chunk
					continue

				# Load the whole chunk from the file, then extract genes and cells using fancy indexing
				for layer in layers:
					temp = self.layers[layer][:, ix:ix + cols_per_chunk]
					temp = temp[ordering, :]
					temp = temp[:, selection]
					vals[layer] = loompy.MemoryLoomLayer(layer, temp)
				lm = loompy.LayerManager(None)
				for key, layer in vals.items():
					lm[key] = loompy.MemoryLoomLayer(key, layer)
				view = loompy.LoomView(lm, self.ra[ordering], self.ca[ix + selection], self.row_graphs[ordering], self.col_graphs[ix + selection], filename=self.filename, file_attrs=self.attrs)
				yield (ix, ix + selection, view)
				ix += cols_per_chunk
		elif axis == 0:
			if key is not None:
				ordering = np.argsort(self.ca[key])
			else:
				ordering = np.arange(self.shape[1])
			if items is None:
				items = np.fromiter(range(self.shape[0]), dtype='int')
			rows_per_chunk = batch_size
			ix = 0
			while ix < self.shape[0]:
				rows_per_chunk = min(self.shape[0] - ix, rows_per_chunk)
				selection = items - ix
				# Pick out the genes that are in this batch
				selection = selection[np.where(np.logical_and(selection >= 0, selection < rows_per_chunk))[0]]
				if selection.shape[0] == 0:
					ix += rows_per_chunk
					continue

				# Load the whole chunk from the file, then extract genes and cells using fancy indexing
				for layer in layers:
					temp = self.layers[layer][ix:ix + rows_per_chunk, :]
					temp = temp[:, ordering]
					temp = temp[selection, :]
					vals[layer] = loompy.MemoryLoomLayer(layer, temp)
				lm = loompy.LayerManager(None)
				for key, layer in vals.items():
					lm[key] = loompy.MemoryLoomLayer(key, layer)
				view = loompy.LoomView(lm, self.ra[ix + selection], self.ca[ordering], self.row_graphs[ix + selection], self.col_graphs[ordering], filename=self.filename, file_attrs=self.attrs)
				yield (ix, ix + selection, view)
				ix += rows_per_chunk
		else:
			raise ValueError("axis must be 0 or 1")