Esempio n. 1
0
    def _merge_rna(paths, metadata, save_dir, id_col="lane_id", parallel=True):
        """"""
        # TODO: significant memory leakage -- maybe make an optional kwarg
        if parallel:
            pool = Parallel(n_jobs=-2)
            rna_list = pool(
                delayed(Counts.from_cellranger)(path) for path in paths)
        else:
            rna_list = [Counts.from_cellranger(path) for path in paths]
        widths = list(map(lambda x: x.shape[1], rna_list))
        if len(set(widths)) > 1:
            raise ValueError(
                f"Can't merge matrices with mixed shapes: {set(widths)}. Details: {list(zip(paths, widths))}"
            )
        rna = Counts.concatenate(rna_list)
        meta = None
        if metadata is not None:
            metadata_cols = [
                col for col in metadata.columns if not col.startswith("path_")
            ]
            metadata = metadata[metadata_cols]
            cells_per_matrix = [counts.shape[0] for counts in rna_list]
            meta = metadata.loc[metadata.index.repeat(
                cells_per_matrix)].reset_index(drop=True)
            if id_col in metadata:
                rna.index = rna.index.str.slice(0,
                                                -1) + meta[id_col].astype(str)
        if rna.index.duplicated().any():
            raise ValueError(
                "cell identifiers must be unique. Consider using metadata with `lane_id` column or specify a custom "
                "`id_col")
        if meta is None:
            meta = pd.DataFrame(index=rna.cell_ids)
            meta.index.name = None
        else:
            meta = pd.DataFrame(meta)
            meta.index = rna.cell_ids
        if save_dir:
            os.makedirs(save_dir, exist_ok=True)

            meta.to_csv(save_dir / "meta.tsv", sep="\t")
            # TODO: move create_rds val to config
            rna.save(save_dir / "rna.pickle", save_rds=True)
        return rna, meta
Esempio n. 2
0
 def _merge_rna(paths, metadata, save_dir):
     """"""
     rna_list = [Counts.from_cellranger(dir_) for dir_ in paths]
     meta = None
     if metadata is not None:
         metadata_cols = [
             col for col in metadata.columns if not col.startswith("path_")
         ]
         metadata = metadata[metadata_cols]
         cells_per_matrix = [counts.shape[0] for counts in rna_list]
         meta = metadata.loc[metadata.index.repeat(
             cells_per_matrix)].reset_index(drop=True)
     rna = Counts.concatenate(rna_list)
     if meta is not None:
         meta.index = rna.cell_ids
     else:
         meta = rna.cell_ids
     if save_dir:
         os.makedirs(save_dir, exist_ok=True)
         meta.to_csv(save_dir / "meta.tsv", sep="\t")
         # TODO: move create_rds val to config
         rna.save(save_dir / "rna.pickle", create_rds=True)
     return rna, meta