from crispy.CrispyPlot import CrispyPlot from sklearn.mixture import GaussianMixture from statsmodels.stats.multitest import multipletests from mpl_toolkits.axes_grid1.inset_locator import inset_axes from cancer_proteomics.notebooks import DataImport, PPI_PAL, PPI_ORDER LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") PPIPATH = pkg_resources.resource_filename("data", "ppi/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # Read samplesheet ss = DataImport.read_samplesheet() # Read proteomics (Proteins x Cell lines) prot = DataImport.read_protein_matrix(map_protein=True) # Read Transcriptomics gexp = DataImport.read_gene_matrix() # Read CRISPR crispr = DataImport.read_crispr_matrix() # PPIs ppis = pd.read_csv(f"{TPATH}/PPInteractions.csv.gz") # Hits lm_drug = pd.read_csv(f"{TPATH}/lm_sklearn_degr_drug_annotated.csv.gz")
import numpy as np import pandas as pd import pkg_resources import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt from crispy import CrispyPlot from cancer_proteomics.notebooks import DataImport RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # CMP samplesheet cmp = DataImport.read_cmp_samplesheet() cmp["CCLE_ID_SHORT"] = cmp["CCLE_ID"].apply( lambda v: v.split("_")[0] if str(v).lower() != 'nan' else np.nan) # Proteomics prot = DataImport.read_protein_matrix(map_protein=True) # Read proteomics BROAD (Proteins x Cell lines) prot_broad = DataImport.read_protein_matrix_broad() prot_broad = prot_broad.rename(columns=cmp.set_index("CCLE_ID")["model_id"]) # Read Transcriptomics gexp = DataImport.read_gene_matrix() # Read CRISPR crispr = DataImport.read_crispr_matrix()
from sklearn.mixture import GaussianMixture from statsmodels.stats.multitest import multipletests from mpl_toolkits.axes_grid1.inset_locator import inset_axes from cancer_proteomics.notebooks import DataImport, two_vars_correlation, PALETTE_TTYPE from crispy.DataImporter import CORUM, BioGRID, PPI, HuRI LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") PPIPATH = pkg_resources.resource_filename("data", "ppi/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # Read samplesheet ss = DataImport.read_samplesheet() plot_df = ss.sort_values(["tissue", "replicates_correlation"]) plot_df.tissue = plot_df.tissue.astype("category") plot_df.tissue.cat.set_categories(ss["tissue"].value_counts().index, inplace=True) plot_df = plot_df.sort_values(["tissue", "replicates_correlation"], ascending=[True, False]).reset_index() theta = np.linspace(0.0, 2 * np.pi, plot_df.shape[0], endpoint=False) width = (2 * np.pi) / plot_df.shape[0] fig, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=600,
) LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") PPIPATH = pkg_resources.resource_filename("data", "ppi/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Import MOFA analysis factors, weights, rsquare = MOFA.read_mofa_hdf5( f"{TPATH}/MultiOmics_broad.hdf5") # ### Import manifest manifest = DataImport.read_protein_perturbation_manifest() # Remove MDA-MB-468 manifest = manifest[~manifest["External Patient ID"].isin(["MDA-MB-468"])] # Remove low quality samples manifest = manifest[~((manifest["Cell Line"] == "BT-549 1% FBS") & (manifest["Date on sample"] == "4/7/19 "))] manifest = manifest.drop([ "200627_b2-1-t5-1_00wuz_00yid_m03_s_1", "200623_b2-1-t4-1_00wuz_00yh3_m01_s_1" ]) # Remove low FBS levels manifest = manifest[["0.5%FBS" not in v for v in manifest["Cell Line"]]]
from adjustText import adjust_text from crispy.Enrichment import Enrichment from crispy.CrispyPlot import CrispyPlot from sklearn.mixture import GaussianMixture from statsmodels.stats.multitest import multipletests from cancer_proteomics.notebooks import DataImport, two_vars_correlation LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # Read samplesheet ss = DataImport.read_samplesheet() # Read proteomics (Proteins x Cell lines) prot = DataImport.read_protein_matrix(map_protein=True) # Read Transcriptomics gexp = DataImport.read_gene_matrix() # Read copy number cnv = DataImport.read_copy_number() # ### Overlaps samples = list(set.intersection(set(prot), set(gexp), set(cnv))) genes = list(set.intersection(set(prot.index), set(gexp.index), set(cnv.index))) LOG.info(f"Genes: {len(genes)}; Samples: {len(samples)}")
from crispy.GIPlot import GIPlot from crispy.Enrichment import Enrichment from crispy.CrispyPlot import CrispyPlot from scipy.stats import spearmanr from cancer_proteomics.notebooks import DataImport, DimReduction, two_vars_correlation LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # Read samplesheet ss = DataImport.read_samplesheet() # Read proteomics (Proteins x Cell lines) prot = DataImport.read_protein_matrix(map_protein=True) # Read proteomics BROAD (Proteins x Cell lines) prot_broad = DataImport.read_protein_matrix_broad() # Read Transcriptomics gexp = DataImport.read_gene_matrix() # Overlaps # samples = list(set.intersection(set(prot), set(gexp))) genes = list( set.intersection(set(prot.index), set(gexp.index), set(prot_broad.index)))
from crispy.LMModels import LMModels, LModel from cancer_proteomics.notebooks import DataImport LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") PPIPATH = pkg_resources.resource_filename("data", "ppi/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # PPI ppi = PPI(ddir=PPIPATH).build_string_ppi(score_thres=900) # Read samplesheet ss = DataImport.read_samplesheet() # Read proteomics (Proteins x Cell lines) prot = DataImport.read_protein_matrix(map_protein=True, min_measurements=300) # Read Transcriptomics gexp = DataImport.read_gene_matrix() # Read CRISPR crispr = DataImport.read_crispr_matrix() crispr_institute = DataImport.read_crispr_institute()[crispr.columns] crispr_skew = crispr.apply(skew, axis=1, nan_policy="omit").astype(float) # Read Drug-response drespo = DataImport.read_drug_response(min_measurements=300) dtargets = DataImport.read_drug_target()
import pkg_resources import matplotlib.pyplot as plt from crispy.GIPlot import GIPlot from crispy.MOFA import MOFA, MOFAPlot from cancer_proteomics.notebooks import DataImport, two_vars_correlation, PALETTE_TTYPE LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") PPIPATH = pkg_resources.resource_filename("data", "ppi/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # Read samplesheet ss = DataImport.read_samplesheet() # Read proteomics (Proteins x Cell lines) prot = DataImport.read_protein_matrix(map_protein=True, min_measurements=300) # Read proteomics BROAD (Proteins x Cell lines) prot_broad = DataImport.read_protein_matrix_broad() # Read Transcriptomics gexp = DataImport.read_gene_matrix() # Read CRISPR crispr = DataImport.read_crispr_matrix() # Read Methylation methy = DataImport.read_methylation_matrix()
from adjustText import adjust_text from matplotlib_venn import venn2, venn2_circles from crispy.CrispyPlot import CrispyPlot from sklearn.preprocessing import StandardScaler from cancer_proteomics.notebooks import DataImport, two_vars_correlation, PALETTE_TTYPE LOG = logging.getLogger("cancer_proteomics") DPATH = pkg_resources.resource_filename("data", "/") PPIPATH = pkg_resources.resource_filename("data", "ppi/") TPATH = pkg_resources.resource_filename("tables", "/") RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/") # ### Imports # Read samplesheet ss = DataImport.read_samplesheet() # Read proteomics (Proteins x Cell lines) prot = DataImport.read_protein_matrix(map_protein=True) peptide_raw_mean = DataImport.read_peptide_raw_mean() # Read Transcriptomics gexp = DataImport.read_gene_matrix() # Read CRISPR crispr = DataImport.read_crispr_matrix() # Read Drug-response drespo = DataImport.read_drug_response() dmaxc = DataImport.read_drug_max_concentration()