def test_get_coordinates(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file # POC for custom data in the loom file. num_cells = 100 _X = np.concatenate( [rg.normal(n, 0.1, int(num_cells / 4)) for n in range(-2, 2)]) _Y = rg.normal(0, 0.1, num_cells) main_embedding = pd.DataFrame(columns=["_X", "_Y"]) main_embedding["_X"] = _X main_embedding["_Y"] = _Y col_attrs["Embedding"] = Loom.dfToNamedMatrix(main_embedding) lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) np.testing.assert_equal(test_loom.get_coordinates(-1), { "x": _X, "y": -_Y, "cellIndices": list(range(num_cells)) })
def label_all_clusters(loom: Loom, embedding: int, feature: str) -> List[FeatureLabel]: """ Extract and group cells based on clustering. Place labels for each cluster at the barycentre of the cluster. """ meta_data = loom.get_meta_data() for clustering in meta_data["clusterings"]: if clustering["name"] == re.sub("^Clustering: ", "", feature): clustering_id = str(clustering["id"]) cluster_names_dict = loom.get_cluster_names(int(clustering_id)) label_set = set() for i in uniq(loom.get_clustering_by_id(int(clustering_id))): if i == -1: label_set.add((i, "Unclustered", "XX" * 3)) continue label_set.add((i, cluster_names_dict[i], constant.BIG_COLOR_LIST[i % len(constant.BIG_COLOR_LIST)])) cluster_ids, clusters, colours = zip(*label_set) def labels() -> Generator[FeatureLabel, None, None]: for i, cluster in enumerate(clusters): coords = loom.get_coordinates( coordinatesID=embedding, cluster_info=(int(clustering_id), int(cluster_ids[i])) ) yield FeatureLabel( label=cluster, colour=colours[i], coordinate=Coordinate(x=np.mean(coords["x"]), y=np.mean(coords["y"])), ) return [label for label in labels()]
def test_get_abs_file_path(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) assert test_loom.get_abs_file_path() == LOOM_PATH
def test_has_motif_and_track_regulons(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) assert test_loom.has_motif_and_track_regulons() == True
def test_infer_species(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) assert test_loom.infer_species() == ("Unknown", {})
def test_get_global_attribute_by_name(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) assert test_loom.get_global_attribute_by_name( "Genome") == "Nomen dubium"
def test_get_cell_ids(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) num_cells = ds.shape[1] assert (test_loom.get_cell_ids() == np.array( [f"Cell_{n}" for n in range(1, num_cells + 1)])).all()
def test_get_meta_data_cluster_by_clustering_id_and_cluster_id(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) assert ( test_loom.get_meta_data_cluster_by_clustering_id_and_cluster_id( 0, 0)["description"] == "Unannotated Cluster 1")
def get_gene_expression(loom_file): matrix, row_attrs, col_attrs, attrs = loom_file lp.create(filename=str(LOOM_PATH), layers=matrix, row_attrs=row_attrs, col_attrs=col_attrs, file_attrs=attrs) with lp.connect(LOOM_PATH, mode="r", validate=False) as ds: test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER) np.testing.assert_equal( test_loom.get_gene_expression("Gene_1", True, False), np.log1p(matrix[0])) np.testing.assert_equal( test_loom.get_gene_expression("Gene_100", False, False), matrix[99])
def label_all_clusters(loom: Loom, embedding: int, feature: str) -> List[FeatureLabel]: """ Extract and group cells based on clustering. Place labels for each cluster at the barycentre of the cluster. """ meta_data = loom.get_meta_data() for clustering in meta_data["clusterings"]: if clustering["name"] == re.sub("^Clustering: ", "", feature): clustering_id = str(clustering["id"]) cluster_names_dict = loom.get_cluster_names(int(clustering_id)) label_set = set() md_clustering = loom.get_meta_data_clustering_by_id(int(clustering_id)) colour_list = ([ color[1:] if color.startswith("#") else color for color in md_clustering["clusterColors"] ] if "clusterColors" in md_clustering else constant.BIG_COLOR_LIST) if len(cluster_names_dict.keys()) > len(colour_list): logger.warning( f"Not enough custom colors defined. Falling back to BIG_COLOR_LIST" ) colour_list = constant.BIG_COLOR_LIST for i in uniq(loom.get_clustering_by_id(int(clustering_id))): if i == -1: label_set.add((i, "Unclustered", "XX" * 3)) continue label_set.add( (i, cluster_names_dict[i], colour_list[i % len(colour_list)])) cluster_ids, clusters, colours = zip(*label_set) def labels() -> Generator[FeatureLabel, None, None]: for i, cluster in enumerate(clusters): coords = loom.get_coordinates(coordinatesID=embedding, cluster_info=(int(clustering_id), int(cluster_ids[i]))) yield FeatureLabel( label=cluster, colour=colours[i], coordinate=Coordinate(x=np.mean(coords["x"]), y=np.mean(coords["y"])), ) return [label for label in labels()]
def label_annotation(loom: Loom, embedding: int, feature: str) -> List[FeatureLabel]: """ Extract and group cells based on annotation. Place labels for each annotation at the barycentre of the cell cluster. """ md_annotation = loom.get_meta_data_annotation_by_name(name=feature) values = md_annotation["values"] colours = to_colours(range(len(values)), color_list=md_annotation["colors"] if "colors" in md_annotation else None) def labels() -> Generator[FeatureLabel, None, None]: for i, annotation in enumerate(values): coords = loom.get_coordinates( coordinatesID=embedding, annotation=[Annotation(name=feature, values=[annotation])]) yield FeatureLabel( label=annotation, colour=colours[i], coordinate=Coordinate(x=np.mean(coords["x"]), y=np.mean(coords["y"])), ) return [label for label in labels()]
def add_loom(self, file_path: Path, abs_file_path: Path, loom_connection) -> Loom: loom = Loom( file_path=file_path, abs_file_path=abs_file_path, loom_connection=loom_connection, loom_file_handler=self, ) self.active_looms[abs_file_path] = loom return loom
def add_loom(self, partial_md5_hash: str, file_path: str, abs_file_path: str, loom_connection): loom = Loom( partial_md5_hash=partial_md5_hash, file_path=file_path, abs_file_path=abs_file_path, loom_connection=loom_connection, loom_file_handler=self, ) self.active_looms[abs_file_path] = loom return loom
def get_final_feature_and_type( loom: Loom, aggregated_matches: Dict[ResultTypePair, List[str]] ) -> Tuple[Dict[ResultTypePair, str], Dict[ResultTypePair, str]]: """ Determine final features and types. Build the lists needed to correctly associate each match with its final category. Args: loom (Loom): Loom object aggregated_matches (Dict[ResultTypePair, List[str]]): Aggregated matches from aggregate_matches Returns: Tuple[Dict[ResultTypePair, str], Dict[ResultTypePair, str]]: Features and Feature types """ features: Dict[ResultTypePair, str] = {} feature_types: Dict[ResultTypePair, str] = {} for k in aggregated_matches: try: category = DEFINED_SEARCH_TYPES[k[1]]["final_category"] except KeyError: category = k[1] if category == "cluster_category": clustering_id = int(k[0].split("_")[0]) cluster_id = int(k[0].split("_")[1]) clustering_name = loom.get_meta_data_clustering_by_id( clustering_id)["name"] cluster = loom.get_meta_data_cluster_by_clustering_id_and_cluster_id( clustering_id, cluster_id) features[k] = cluster["description"] feature_types[k] = f"Clustering: {clustering_name}" else: features[k] = k[0] feature_types[k] = category return features, feature_types
def downloadSubLoom(self, request, context): start_time = time.time() loom = self.lfh.get_loom(loom_file_path=Path(request.loomFilePath)) loom_connection = loom.get_connection() meta_data = loom.get_meta_data() file_name = request.loomFilePath # Check if not a public loom file if "/" in request.loomFilePath: loom_name = request.loomFilePath.split("/") file_name = loom_name[1].split(".")[0] if request.featureType == "clusterings": a = list( filter(lambda x: x["name"] == request.featureName, meta_data["clusterings"])) b = list( filter(lambda x: x["description"] == request.featureValue, a[0]["clusters"]))[0] cells = loom_connection.ca["Clusterings"][str( a[0]["id"])] == b["id"] logger.debug("Number of cells in {0}: {1}".format( request.featureValue, np.sum(cells))) sub_loom_file_name = file_name + "_Sub_" + request.featureValue.replace( " ", "_").replace("/", "_") elif request.featureType == "cellSelection": cells = np.full(loom.get_nb_cells(), False) cells[request.cellIndices] = True logger.debug( f"Number of cells in selection: {len(request.cellIndices)}") sub_loom_file_name = ( f"{file_name}_CellSelection_{request.featureValue}_{datetime.datetime.now().strftime('%y%m%d_%H%M')}" ) else: logger.error("This feature is currently not implemented.") return if not os.path.exists( os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp")): os.mkdir( os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp")) sub_loom_file_path = os.path.join( self.dfh.get_data_dirs()["Loom"]["path"], "tmp", sub_loom_file_name + ".loom") # Check if the file already exists if os.path.exists(path=sub_loom_file_path): os.remove(path=sub_loom_file_path) # Create new file attributes sub_loom_file_attrs = dict() sub_loom_file_attrs["title"] = sub_loom_file_name sub_loom_file_attrs["CreationDate"] = timestamp() sub_loom_file_attrs["LOOM_SPEC_VERSION"] = _version.__version__ if "title" in loom_connection.attrs: sub_loom_file_attrs[ "note"] = f"This loom is a subset of {Loom.clean_file_attr(file_attr=loom_connection.attrs['title'])} loom file" else: sub_loom_file_attrs[ "note"] = f"This loom is a subset of {request.loomFilePath} loom file" sub_loom_file_attrs["MetaData"] = Loom.clean_file_attr( file_attr=loom_connection.attrs["MetaData"]) # - Use scan to subset cells (much faster than naive subsetting): avoid to load everything into memory # - Loompy bug: loompy.create_append works but generate a file much bigger than its parent # So prepare all the data and create the loom afterwards logger.debug("Subsetting {0} cluster from the active .loom...".format( request.featureValue)) processed = 0 tot_cells = loom.get_nb_cells() yield s_pb2.DownloadSubLoomReply( loomFilePath="", loomFileSize=0, progress=s_pb2.Progress(value=0.01, status="Sub Loom creation started!"), isDone=False, ) sub_matrices = [] for (idx, _, view) in loom_connection.scan(items=cells, axis=1, batch_size=5120): sub_matrices.append(view[:, :]) # Send the progress processed = idx / tot_cells yield s_pb2.DownloadSubLoomReply( loomFilePath="", loomFileSize=0, progress=s_pb2.Progress(value=processed, status="Sub Loom Created!"), isDone=False, ) yield s_pb2.DownloadSubLoomReply( loomFilePath="", loomFileSize=0, progress=s_pb2.Progress(value=0.99, status="Sub Loom Created!"), isDone=False, ) sub_matrix = np.concatenate(sub_matrices, axis=1) logger.debug("Creating {0} sub .loom...".format(request.featureValue)) lp.create( sub_loom_file_path, sub_matrix, row_attrs=loom_connection.ra, col_attrs=loom_connection.ca[cells], file_attrs=sub_loom_file_attrs, ) del sub_matrix with open(sub_loom_file_path, "r") as fh: loom_file_size = os.fstat(fh.fileno())[6] logger.debug( "{0:.5f} seconds elapsed making loom ---".format(time.time() - start_time)) yield s_pb2.DownloadSubLoomReply( loomFilePath=sub_loom_file_path, loomFileSize=loom_file_size, progress=s_pb2.Progress(value=1.0, status="Sub Loom Created!"), isDone=True, )