def diffexp(branch: "CellBranch", run_name: str): # TODO: refactor both diffexp versions into `_get_diffexp_args` input_metadata_path = branch.get_temp_meta_path(run_name) run_spec = branch.spec[run_name] params = run_spec.params process_run = branch[run_name] output_diffexp_path = process_run.path_map["diffexp_result"] root_dir = str(branch.root) spec_str = branch.spec.shell_str groups = branch[run_name].branch.meta["partition_code"].unique().astype( "O") if len(groups) != 2: raise ValueError( f"Exactly two groups required for diffexp. Got: {groups}") ident1 = groups.min() ident2 = groups.max() groupby = "partition_code" arg_list = [ input_metadata_path, output_diffexp_path, root_dir, spec_str, params["test"], params["logfc_thresh"], ident1, ident2, groupby, N_CPUS, # R_FUNCTIONS_FILEPATH, ] run_r_script_logged(branch, R_DIFFEXP_SCRIPT, arg_list, run_name)
def reduce(branch: "CellBranch", run_name: str): input_metadata_path = branch.get_temp_meta_path(run_name) run_spec = branch.spec[run_name] params = run_spec.params process_run = branch[run_name] input_rds_path = process_run.path_map_prior["rna_r"] output_embeddings_path = process_run.path_map["pca_embeddings"] output_loadings_path = process_run.path_map["pca_loadings"] output_stdev_path = process_run.path_map["pca_stdev"] npcs = params["pca_npcs"] # r_functions_filepath = R_FUNCTIONS_FILEPATH arg_list = [ input_metadata_path, input_rds_path, output_embeddings_path, output_loadings_path, output_stdev_path, npcs, # r_functions_filepath, ] run_r_script_logged(branch, R_PCA_SCRIPT, arg_list, run_name) meta = run_umap( output_embeddings_path, n_neighbors=params["umap_n_neighbors"], min_dist=params["umap_min_dist"], n_components=params["umap_n_components"], metric=params["umap_metric"], ) meta.index = branch.meta.index output_meta_path = process_run.path_map["meta"] meta.to_csv(output_meta_path, sep="\t")
def markers(branch: "CellBranch", run_name: str): input_metadata_path = branch.get_temp_meta_path(run_name) run_spec = branch.spec[run_name] params = run_spec.params process_run = branch[run_name] output_markers_path = process_run.path root_dir = str(branch.root) spec_str = branch.spec.shell_str cluster_counts = branch.meta["cluster_id"].value_counts() deficient_clusters = cluster_counts[cluster_counts < 2].index.tolist() if deficient_clusters: raise ValueError( f"Deficient clusters {deficient_clusters} with fewer than 3 " f"cells. Adjust clustering parameters.\n{cluster_counts}") arg_list = [ input_metadata_path, output_markers_path, root_dir, spec_str, params["logfc_thresh"], params["test"], N_CPUS, # R_FUNCTIONS_FILEPATH, ] run_r_script_logged(branch, R_MARKERS_SCRIPT, arg_list, run_name)
def normalize(branch: "CellBranch", run_name: str): """ Performs: - cell filtering by `min_genes`, `max_genes`, and `perc_mito_cutoff` - gene filtering by `min_cells` expressing - normalization via either: - seurat default - sctransform Params: min_genes (int): max_genes (int): min_cells (int): perc_mito_cutoff (int, float): method (str): from {"seurat_default", "sctransform"} nfeatures (int): (seurat_default only) """ input_metadata_path = branch.get_temp_meta_path(run_name) # TODO: add a root filepaths lookup run_spec = branch.spec[run_name] params = run_spec.params process_run = branch[run_name] input_rds_path = process_run.path_map_prior["rna_r"] output_rds_path = process_run.path_map["rna_r"] min_genes = params["min_genes"] max_genes = params["max_genes"] min_cells = params["min_cells"] perc_mito_cutoff = params["perc_mito_cutoff"] method = params["method"] arg_list = [ input_metadata_path, input_rds_path, output_rds_path, min_genes, max_genes, min_cells, perc_mito_cutoff, # R_FUNCTIONS_FILEPATH, ] if method == "sctransform": output_corrected_umi_path = process_run.path_map["corrected_umi"] output_pearson_residual_path = process_run.path_map["pearson_residual"] arg_list += [output_corrected_umi_path, output_pearson_residual_path] r_normalize_script = R_SCTRANSFORM_SCRIPT elif method == "seurat_default": verbose = True verbose = str(verbose).upper() nfeatures = params["nfeatures"] arg_list += [verbose, nfeatures] r_normalize_script = R_SEURAT_DEFAULT_NORM_SCRIPT else: raise ValueError( f"Invalid normalization method: {method}. Use 'sctransform' or 'seurat_default'" ) run_r_script_logged(branch, r_normalize_script, arg_list, run_name)
def cluster(branch: "CellBranch", run_name: str): input_metadata_path = branch.get_temp_meta_path(run_name) # TODO: add a root filepaths lookup run_spec = branch.spec[run_name] params = run_spec.params process_run = branch[run_name] output_clusters_path = process_run.path_map["meta"] # TODO: may actually need way to pass spec through root_dir = str(branch.root) spec_str = branch.spec.shell_str arg_list = [ input_metadata_path, output_clusters_path, root_dir, spec_str, params["num_pcs"], params["res"], params["eps"], # R_FUNCTIONS_FILEPATH, ] run_r_script_logged(branch, R_CLUSTER_SCRIPT, arg_list, run_name)
def plot_cell_cycle_scoring_scat(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, "plot_cell_cycle_scoring_scat")
def plot_umap_embeddings_scat_r(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, "plot_umap_embeddings_scat")
def plot_pca_loadings_scat(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, "plot_pca_loadings_scat")
def plot_pca_elbow_curv(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, "plot_pca_elbow_curv")
def plot_perc_hsp_per_cell_vln_r(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, "plot_perc_hsp_per_cell_vln_r")
def plot_umis_vs_perc_ribo_scat_r(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, "plot_umis_vs_perc_ribo_scat")
def plot_marker_genes_per_cluster_bar(branch: "CellBranch", r_script: str, args: list, **kwargs): args.append(branch.current_path) run_r_script_logged(branch, r_script, args, "plot_marker_genes_per_cluster_bar")
def plot_umis_per_barcode_rank_curv(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, branch.current_process)
def plot_highest_exprs_dens(branch: "CellBranch", r_script: str, args: list, **kwargs): run_r_script_logged(branch, r_script, args, "plot_highest_exprs_dens")