def write_loom_file(data: MultimodalData, output_file: str) -> None: """ Write a MultimodalData to loom file. Will assert data only contain one type of experiment. """ keys = data.list_data() if len(keys) > 1: raise ValueError( f"Data contain multiple modalities: {','.join(keys)}!") data.select_data(keys[0]) matrices = data.list_keys() assert "X" in matrices if len(matrices) == 0: raise ValueError("Could not write empty matrix to a loom file!") def _process_attrs(key_name: str, attrs: pd.DataFrame, attrs_multi: dict) -> Dict[str, object]: res_dict = {key_name: attrs.index.values} for key in attrs.columns: res_dict[key] = np.array(attrs[key].values) for key, value in attrs_multi.items(): if value.ndim > 1: # value.ndim == 1 refers to np.recarray, which will not be written to a loom file. res_dict[key] = value if value.shape[1] > 1 else value[:, 0] return res_dict row_attrs = _process_attrs("Gene", data.var, data.varm) col_attrs = _process_attrs("CellID", data.obs, data.obsm) accession_key = "featureid" if "featureid" in row_attrs else ( "gene_ids" if "gene_ids" in row_attrs else None) if accession_key is not None: row_attrs["Accession"] = row_attrs.pop(accession_key) layers = {} for matkey in matrices: layers["" if matkey == "X" else matkey] = data.get_matrix(matkey).T file_attrs = {} for key, value in data.uns.items(): if isinstance(value, str): file_attrs[key] = value import loompy loompy.create(output_file, layers, row_attrs, col_attrs, file_attrs=file_attrs) logger.info(f"{output_file} is written.")
def log_norm( data: MultimodalData, norm_count: float = 1e5, backup_matrix: str = "raw.X", ) -> None: """Normalization, and then apply natural logarithm to the data. Parameters ---------- data: ``pegasusio.MultimodalData`` Use current selected modality in data, which should contain one RNA expression matrix. norm_count: ``int``, optional, default: ``1e5``. Total counts of one cell after normalization. backup_matrix: ``str``, optional, default: ``raw.X``. The key name of the backup count matrix, usually the raw counts. Returns ------- ``None`` Update ``data.X`` with count matrix after log-normalization. In addition, back up the original count matrix as ``backup_matrix``. In case of rerunning normalization while ``backup_matrix`` already exists, use ``backup_matrix`` instead of ``data.X`` for normalization. Examples -------- >>> pg.log_norm(data) """ if isinstance(data, MultimodalData): data = data.current_data() assert data.get_modality() == "rna" if backup_matrix not in data.list_keys(): data.add_matrix(backup_matrix, data.X) data.X = data.X.astype(np.float32) # force copy else: # The case of rerunning log_norm. Use backup matrix as source. data.X = data.get_matrix(backup_matrix).astype( np.float32) # force copy logger.warning( "Rerun log-normalization. Use the raw counts in backup instead.") data.obs["scale"] = normalize_by_count(data.X, data.var["robust"].values, norm_count, True) data.uns["norm_count"] = norm_count