Beispiel #1
0
from crispy.CrispyPlot import CrispyPlot
from sklearn.mixture import GaussianMixture
from statsmodels.stats.multitest import multipletests
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from cancer_proteomics.notebooks import DataImport, PPI_PAL, PPI_ORDER

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
PPIPATH = pkg_resources.resource_filename("data", "ppi/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# Read samplesheet
ss = DataImport.read_samplesheet()

# Read proteomics (Proteins x Cell lines)
prot = DataImport.read_protein_matrix(map_protein=True)

# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

# Read CRISPR
crispr = DataImport.read_crispr_matrix()

# PPIs
ppis = pd.read_csv(f"{TPATH}/PPInteractions.csv.gz")

# Hits
lm_drug = pd.read_csv(f"{TPATH}/lm_sklearn_degr_drug_annotated.csv.gz")
Beispiel #2
0
import numpy as np
import pandas as pd
import pkg_resources
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from crispy import CrispyPlot
from cancer_proteomics.notebooks import DataImport

RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# CMP samplesheet
cmp = DataImport.read_cmp_samplesheet()
cmp["CCLE_ID_SHORT"] = cmp["CCLE_ID"].apply(
    lambda v: v.split("_")[0] if str(v).lower() != 'nan' else np.nan)

# Proteomics
prot = DataImport.read_protein_matrix(map_protein=True)

# Read proteomics BROAD (Proteins x Cell lines)
prot_broad = DataImport.read_protein_matrix_broad()
prot_broad = prot_broad.rename(columns=cmp.set_index("CCLE_ID")["model_id"])

# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

# Read CRISPR
crispr = DataImport.read_crispr_matrix()
from sklearn.mixture import GaussianMixture
from statsmodels.stats.multitest import multipletests
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from cancer_proteomics.notebooks import DataImport, two_vars_correlation, PALETTE_TTYPE
from crispy.DataImporter import CORUM, BioGRID, PPI, HuRI

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
PPIPATH = pkg_resources.resource_filename("data", "ppi/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# Read samplesheet
ss = DataImport.read_samplesheet()

plot_df = ss.sort_values(["tissue", "replicates_correlation"])
plot_df.tissue = plot_df.tissue.astype("category")
plot_df.tissue.cat.set_categories(ss["tissue"].value_counts().index,
                                  inplace=True)
plot_df = plot_df.sort_values(["tissue", "replicates_correlation"],
                              ascending=[True, False]).reset_index()

theta = np.linspace(0.0, 2 * np.pi, plot_df.shape[0], endpoint=False)
width = (2 * np.pi) / plot_df.shape[0]

fig, ax = plt.subplots(1,
                       1,
                       figsize=(3, 3),
                       dpi=600,
Beispiel #4
0
)

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
PPIPATH = pkg_resources.resource_filename("data", "ppi/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Import MOFA analysis

factors, weights, rsquare = MOFA.read_mofa_hdf5(
    f"{TPATH}/MultiOmics_broad.hdf5")

# ### Import manifest

manifest = DataImport.read_protein_perturbation_manifest()

# Remove MDA-MB-468
manifest = manifest[~manifest["External Patient ID"].isin(["MDA-MB-468"])]

# Remove low quality samples
manifest = manifest[~((manifest["Cell Line"] == "BT-549 1% FBS") &
                      (manifest["Date on sample"] == "4/7/19 "))]

manifest = manifest.drop([
    "200627_b2-1-t5-1_00wuz_00yid_m03_s_1",
    "200623_b2-1-t4-1_00wuz_00yh3_m01_s_1"
])

# Remove low FBS levels
manifest = manifest[["0.5%FBS" not in v for v in manifest["Cell Line"]]]
from adjustText import adjust_text
from crispy.Enrichment import Enrichment
from crispy.CrispyPlot import CrispyPlot
from sklearn.mixture import GaussianMixture
from statsmodels.stats.multitest import multipletests
from cancer_proteomics.notebooks import DataImport, two_vars_correlation

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# Read samplesheet
ss = DataImport.read_samplesheet()

# Read proteomics (Proteins x Cell lines)
prot = DataImport.read_protein_matrix(map_protein=True)

# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

# Read copy number
cnv = DataImport.read_copy_number()

# ### Overlaps
samples = list(set.intersection(set(prot), set(gexp), set(cnv)))
genes = list(set.intersection(set(prot.index), set(gexp.index),
                              set(cnv.index)))
LOG.info(f"Genes: {len(genes)}; Samples: {len(samples)}")
from crispy.GIPlot import GIPlot
from crispy.Enrichment import Enrichment
from crispy.CrispyPlot import CrispyPlot
from scipy.stats import spearmanr

from cancer_proteomics.notebooks import DataImport, DimReduction, two_vars_correlation

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# Read samplesheet
ss = DataImport.read_samplesheet()

# Read proteomics (Proteins x Cell lines)
prot = DataImport.read_protein_matrix(map_protein=True)

# Read proteomics BROAD (Proteins x Cell lines)
prot_broad = DataImport.read_protein_matrix_broad()

# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

# Overlaps
#
samples = list(set.intersection(set(prot), set(gexp)))
genes = list(
    set.intersection(set(prot.index), set(gexp.index), set(prot_broad.index)))
Beispiel #7
0
from crispy.LMModels import LMModels, LModel
from cancer_proteomics.notebooks import DataImport

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
PPIPATH = pkg_resources.resource_filename("data", "ppi/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# PPI
ppi = PPI(ddir=PPIPATH).build_string_ppi(score_thres=900)

# Read samplesheet
ss = DataImport.read_samplesheet()

# Read proteomics (Proteins x Cell lines)
prot = DataImport.read_protein_matrix(map_protein=True, min_measurements=300)

# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

# Read CRISPR
crispr = DataImport.read_crispr_matrix()
crispr_institute = DataImport.read_crispr_institute()[crispr.columns]
crispr_skew = crispr.apply(skew, axis=1, nan_policy="omit").astype(float)

# Read Drug-response
drespo = DataImport.read_drug_response(min_measurements=300)
dtargets = DataImport.read_drug_target()
Beispiel #8
0
import pkg_resources
import matplotlib.pyplot as plt
from crispy.GIPlot import GIPlot
from crispy.MOFA import MOFA, MOFAPlot
from cancer_proteomics.notebooks import DataImport, two_vars_correlation, PALETTE_TTYPE

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
PPIPATH = pkg_resources.resource_filename("data", "ppi/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# Read samplesheet
ss = DataImport.read_samplesheet()

# Read proteomics (Proteins x Cell lines)
prot = DataImport.read_protein_matrix(map_protein=True, min_measurements=300)

# Read proteomics BROAD (Proteins x Cell lines)
prot_broad = DataImport.read_protein_matrix_broad()

# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

# Read CRISPR
crispr = DataImport.read_crispr_matrix()

# Read Methylation
methy = DataImport.read_methylation_matrix()
Beispiel #9
0
from adjustText import adjust_text
from matplotlib_venn import venn2, venn2_circles
from crispy.CrispyPlot import CrispyPlot
from sklearn.preprocessing import StandardScaler
from cancer_proteomics.notebooks import DataImport, two_vars_correlation, PALETTE_TTYPE

LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
PPIPATH = pkg_resources.resource_filename("data", "ppi/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

# ### Imports

# Read samplesheet
ss = DataImport.read_samplesheet()

# Read proteomics (Proteins x Cell lines)
prot = DataImport.read_protein_matrix(map_protein=True)
peptide_raw_mean = DataImport.read_peptide_raw_mean()

# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

# Read CRISPR
crispr = DataImport.read_crispr_matrix()

# Read Drug-response
drespo = DataImport.read_drug_response()

dmaxc = DataImport.read_drug_max_concentration()