コード例 #1
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def test_get_coordinates(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file

    # POC for custom data in the loom file.
    num_cells = 100
    _X = np.concatenate(
        [rg.normal(n, 0.1, int(num_cells / 4)) for n in range(-2, 2)])
    _Y = rg.normal(0, 0.1, num_cells)
    main_embedding = pd.DataFrame(columns=["_X", "_Y"])
    main_embedding["_X"] = _X
    main_embedding["_Y"] = _Y
    col_attrs["Embedding"] = Loom.dfToNamedMatrix(main_embedding)

    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)

    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        np.testing.assert_equal(test_loom.get_coordinates(-1), {
            "x": _X,
            "y": -_Y,
            "cellIndices": list(range(num_cells))
        })
コード例 #2
0
def label_all_clusters(loom: Loom, embedding: int, feature: str) -> List[FeatureLabel]:
    """
    Extract and group cells based on clustering. Place labels for each cluster
    at the barycentre of the cluster.
    """
    meta_data = loom.get_meta_data()
    for clustering in meta_data["clusterings"]:
        if clustering["name"] == re.sub("^Clustering: ", "", feature):
            clustering_id = str(clustering["id"])
            cluster_names_dict = loom.get_cluster_names(int(clustering_id))

    label_set = set()
    for i in uniq(loom.get_clustering_by_id(int(clustering_id))):
        if i == -1:
            label_set.add((i, "Unclustered", "XX" * 3))
            continue
        label_set.add((i, cluster_names_dict[i], constant.BIG_COLOR_LIST[i % len(constant.BIG_COLOR_LIST)]))

    cluster_ids, clusters, colours = zip(*label_set)

    def labels() -> Generator[FeatureLabel, None, None]:
        for i, cluster in enumerate(clusters):
            coords = loom.get_coordinates(
                coordinatesID=embedding, cluster_info=(int(clustering_id), int(cluster_ids[i]))
            )

            yield FeatureLabel(
                label=cluster,
                colour=colours[i],
                coordinate=Coordinate(x=np.mean(coords["x"]), y=np.mean(coords["y"])),
            )

    return [label for label in labels()]
コード例 #3
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def test_get_abs_file_path(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file
    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)
    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        assert test_loom.get_abs_file_path() == LOOM_PATH
コード例 #4
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def test_has_motif_and_track_regulons(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file
    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)
    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        assert test_loom.has_motif_and_track_regulons() == True
コード例 #5
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def test_infer_species(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file
    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)
    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        assert test_loom.infer_species() == ("Unknown", {})
コード例 #6
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def test_get_global_attribute_by_name(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file
    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)
    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        assert test_loom.get_global_attribute_by_name(
            "Genome") == "Nomen dubium"
コード例 #7
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def test_get_cell_ids(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file
    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)
    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        num_cells = ds.shape[1]
        assert (test_loom.get_cell_ids() == np.array(
            [f"Cell_{n}" for n in range(1, num_cells + 1)])).all()
コード例 #8
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def test_get_meta_data_cluster_by_clustering_id_and_cluster_id(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file
    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)
    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        assert (
            test_loom.get_meta_data_cluster_by_clustering_id_and_cluster_id(
                0, 0)["description"] == "Unannotated Cluster 1")
コード例 #9
0
ファイル: test_loom.py プロジェクト: diedrikvanden/SCope
def get_gene_expression(loom_file):
    matrix, row_attrs, col_attrs, attrs = loom_file
    lp.create(filename=str(LOOM_PATH),
              layers=matrix,
              row_attrs=row_attrs,
              col_attrs=col_attrs,
              file_attrs=attrs)
    with lp.connect(LOOM_PATH, mode="r", validate=False) as ds:
        test_loom = Loom(LOOM_PATH, LOOM_PATH, ds, LOOM_FILE_HANDLER)
        np.testing.assert_equal(
            test_loom.get_gene_expression("Gene_1", True, False),
            np.log1p(matrix[0]))
        np.testing.assert_equal(
            test_loom.get_gene_expression("Gene_100", False, False),
            matrix[99])
コード例 #10
0
def label_all_clusters(loom: Loom, embedding: int,
                       feature: str) -> List[FeatureLabel]:
    """
    Extract and group cells based on clustering. Place labels for each cluster
    at the barycentre of the cluster.
    """
    meta_data = loom.get_meta_data()
    for clustering in meta_data["clusterings"]:
        if clustering["name"] == re.sub("^Clustering: ", "", feature):
            clustering_id = str(clustering["id"])
            cluster_names_dict = loom.get_cluster_names(int(clustering_id))

    label_set = set()

    md_clustering = loom.get_meta_data_clustering_by_id(int(clustering_id))
    colour_list = ([
        color[1:] if color.startswith("#") else color
        for color in md_clustering["clusterColors"]
    ] if "clusterColors" in md_clustering else constant.BIG_COLOR_LIST)
    if len(cluster_names_dict.keys()) > len(colour_list):
        logger.warning(
            f"Not enough custom colors defined. Falling back to BIG_COLOR_LIST"
        )
        colour_list = constant.BIG_COLOR_LIST

    for i in uniq(loom.get_clustering_by_id(int(clustering_id))):
        if i == -1:
            label_set.add((i, "Unclustered", "XX" * 3))
            continue
        label_set.add(
            (i, cluster_names_dict[i], colour_list[i % len(colour_list)]))

    cluster_ids, clusters, colours = zip(*label_set)

    def labels() -> Generator[FeatureLabel, None, None]:
        for i, cluster in enumerate(clusters):
            coords = loom.get_coordinates(coordinatesID=embedding,
                                          cluster_info=(int(clustering_id),
                                                        int(cluster_ids[i])))

            yield FeatureLabel(
                label=cluster,
                colour=colours[i],
                coordinate=Coordinate(x=np.mean(coords["x"]),
                                      y=np.mean(coords["y"])),
            )

    return [label for label in labels()]
コード例 #11
0
def label_annotation(loom: Loom, embedding: int,
                     feature: str) -> List[FeatureLabel]:
    """
    Extract and group cells based on annotation. Place labels for each annotation
    at the barycentre of the cell cluster.
    """
    md_annotation = loom.get_meta_data_annotation_by_name(name=feature)
    values = md_annotation["values"]
    colours = to_colours(range(len(values)),
                         color_list=md_annotation["colors"]
                         if "colors" in md_annotation else None)

    def labels() -> Generator[FeatureLabel, None, None]:
        for i, annotation in enumerate(values):
            coords = loom.get_coordinates(
                coordinatesID=embedding,
                annotation=[Annotation(name=feature, values=[annotation])])

            yield FeatureLabel(
                label=annotation,
                colour=colours[i],
                coordinate=Coordinate(x=np.mean(coords["x"]),
                                      y=np.mean(coords["y"])),
            )

    return [label for label in labels()]
コード例 #12
0
 def add_loom(self, file_path: Path, abs_file_path: Path,
              loom_connection) -> Loom:
     loom = Loom(
         file_path=file_path,
         abs_file_path=abs_file_path,
         loom_connection=loom_connection,
         loom_file_handler=self,
     )
     self.active_looms[abs_file_path] = loom
     return loom
コード例 #13
0
 def add_loom(self, partial_md5_hash: str, file_path: str,
              abs_file_path: str, loom_connection):
     loom = Loom(
         partial_md5_hash=partial_md5_hash,
         file_path=file_path,
         abs_file_path=abs_file_path,
         loom_connection=loom_connection,
         loom_file_handler=self,
     )
     self.active_looms[abs_file_path] = loom
     return loom
コード例 #14
0
def get_final_feature_and_type(
    loom: Loom, aggregated_matches: Dict[ResultTypePair, List[str]]
) -> Tuple[Dict[ResultTypePair, str], Dict[ResultTypePair, str]]:
    """
    Determine final features and types.

    Build the lists needed to correctly associate each match with its final category.

    Args:
        loom (Loom): Loom object
        aggregated_matches (Dict[ResultTypePair, List[str]]): Aggregated matches from aggregate_matches

    Returns:
        Tuple[Dict[ResultTypePair, str], Dict[ResultTypePair, str]]: Features and Feature types
    """

    features: Dict[ResultTypePair, str] = {}
    feature_types: Dict[ResultTypePair, str] = {}

    for k in aggregated_matches:
        try:
            category = DEFINED_SEARCH_TYPES[k[1]]["final_category"]
        except KeyError:
            category = k[1]

        if category == "cluster_category":
            clustering_id = int(k[0].split("_")[0])
            cluster_id = int(k[0].split("_")[1])
            clustering_name = loom.get_meta_data_clustering_by_id(
                clustering_id)["name"]
            cluster = loom.get_meta_data_cluster_by_clustering_id_and_cluster_id(
                clustering_id, cluster_id)
            features[k] = cluster["description"]
            feature_types[k] = f"Clustering: {clustering_name}"
        else:
            features[k] = k[0]
            feature_types[k] = category

    return features, feature_types
コード例 #15
0
    def downloadSubLoom(self, request, context):
        start_time = time.time()

        loom = self.lfh.get_loom(loom_file_path=Path(request.loomFilePath))
        loom_connection = loom.get_connection()
        meta_data = loom.get_meta_data()

        file_name = request.loomFilePath
        # Check if not a public loom file
        if "/" in request.loomFilePath:
            loom_name = request.loomFilePath.split("/")
            file_name = loom_name[1].split(".")[0]

        if request.featureType == "clusterings":
            a = list(
                filter(lambda x: x["name"] == request.featureName,
                       meta_data["clusterings"]))
            b = list(
                filter(lambda x: x["description"] == request.featureValue,
                       a[0]["clusters"]))[0]
            cells = loom_connection.ca["Clusterings"][str(
                a[0]["id"])] == b["id"]
            logger.debug("Number of cells in {0}: {1}".format(
                request.featureValue, np.sum(cells)))
            sub_loom_file_name = file_name + "_Sub_" + request.featureValue.replace(
                " ", "_").replace("/", "_")
        elif request.featureType == "cellSelection":
            cells = np.full(loom.get_nb_cells(), False)
            cells[request.cellIndices] = True
            logger.debug(
                f"Number of cells in selection: {len(request.cellIndices)}")
            sub_loom_file_name = (
                f"{file_name}_CellSelection_{request.featureValue}_{datetime.datetime.now().strftime('%y%m%d_%H%M')}"
            )
        else:
            logger.error("This feature is currently not implemented.")
            return

        if not os.path.exists(
                os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp")):
            os.mkdir(
                os.path.join(self.dfh.get_data_dirs()["Loom"]["path"], "tmp"))
        sub_loom_file_path = os.path.join(
            self.dfh.get_data_dirs()["Loom"]["path"], "tmp",
            sub_loom_file_name + ".loom")
        # Check if the file already exists
        if os.path.exists(path=sub_loom_file_path):
            os.remove(path=sub_loom_file_path)
        # Create new file attributes
        sub_loom_file_attrs = dict()
        sub_loom_file_attrs["title"] = sub_loom_file_name
        sub_loom_file_attrs["CreationDate"] = timestamp()
        sub_loom_file_attrs["LOOM_SPEC_VERSION"] = _version.__version__
        if "title" in loom_connection.attrs:
            sub_loom_file_attrs[
                "note"] = f"This loom is a subset of {Loom.clean_file_attr(file_attr=loom_connection.attrs['title'])} loom file"
        else:
            sub_loom_file_attrs[
                "note"] = f"This loom is a subset of {request.loomFilePath} loom file"
        sub_loom_file_attrs["MetaData"] = Loom.clean_file_attr(
            file_attr=loom_connection.attrs["MetaData"])
        # - Use scan to subset cells (much faster than naive subsetting): avoid to load everything into memory
        # - Loompy bug: loompy.create_append works but generate a file much bigger than its parent
        #      So prepare all the data and create the loom afterwards
        logger.debug("Subsetting {0} cluster from the active .loom...".format(
            request.featureValue))
        processed = 0
        tot_cells = loom.get_nb_cells()
        yield s_pb2.DownloadSubLoomReply(
            loomFilePath="",
            loomFileSize=0,
            progress=s_pb2.Progress(value=0.01,
                                    status="Sub Loom creation started!"),
            isDone=False,
        )
        sub_matrices = []
        for (idx, _, view) in loom_connection.scan(items=cells,
                                                   axis=1,
                                                   batch_size=5120):
            sub_matrices.append(view[:, :])
            # Send the progress
            processed = idx / tot_cells
            yield s_pb2.DownloadSubLoomReply(
                loomFilePath="",
                loomFileSize=0,
                progress=s_pb2.Progress(value=processed,
                                        status="Sub Loom Created!"),
                isDone=False,
            )
        yield s_pb2.DownloadSubLoomReply(
            loomFilePath="",
            loomFileSize=0,
            progress=s_pb2.Progress(value=0.99, status="Sub Loom Created!"),
            isDone=False,
        )
        sub_matrix = np.concatenate(sub_matrices, axis=1)
        logger.debug("Creating {0} sub .loom...".format(request.featureValue))
        lp.create(
            sub_loom_file_path,
            sub_matrix,
            row_attrs=loom_connection.ra,
            col_attrs=loom_connection.ca[cells],
            file_attrs=sub_loom_file_attrs,
        )
        del sub_matrix
        with open(sub_loom_file_path, "r") as fh:
            loom_file_size = os.fstat(fh.fileno())[6]
        logger.debug(
            "{0:.5f} seconds elapsed making loom ---".format(time.time() -
                                                             start_time))

        yield s_pb2.DownloadSubLoomReply(
            loomFilePath=sub_loom_file_path,
            loomFileSize=loom_file_size,
            progress=s_pb2.Progress(value=1.0, status="Sub Loom Created!"),
            isDone=True,
        )