Ejemplo n.º 1
0
def project_score_data(sgrnas, subset=None):
    ddir = pkg_resources.resource_filename("crispy", "data/")

    score_manifest = pd.read_csv(
        f"{ddir}/crispr_manifests/project_score_manifest.csv.gz")

    s_map = []
    for i in score_manifest.index:
        s_map.append(
            pd.DataFrame(
                dict(
                    model_id=score_manifest.iloc[i]["model_id"],
                    s_ids=score_manifest.iloc[i]["library"].split(", "),
                    s_lib=score_manifest.iloc[i]
                    ["experiment_identifier"].split(", "),
                )))
    s_map = pd.concat(s_map).set_index("s_lib")

    if subset is not None:
        s_map = s_map[s_map["model_id"].isin(subset)]

    score_v1 = CRISPRDataSet("Yusa_v1")
    score_v1_fc = score_v1.counts.norm_rpm().foldchange(score_v1.plasmids)
    score_v1_fc = score_v1_fc.groupby(s_map["model_id"], axis=1).mean()

    score_v11 = CRISPRDataSet("Yusa_v1.1")
    score_v11_fc = score_v11.counts.norm_rpm().foldchange(score_v11.plasmids)
    score_v11_fc = score_v11_fc.groupby(s_map["model_id"], axis=1).mean()

    ess = set(score_v1.lib[score_v1.lib["Gene"].isin(
        Utils.get_essential_genes())].index)
    ness = set(score_v1.lib[score_v1.lib["Gene"].isin(
        Utils.get_non_essential_genes())].index)
    score_v1_fc = ReadCounts(score_v1_fc).scale(essential=ess,
                                                non_essential=ness)
    score_v11_fc = ReadCounts(score_v11_fc).scale(essential=ess,
                                                  non_essential=ness)

    score_fc = pd.concat([score_v1_fc.loc[sgrnas], score_v11_fc.loc[sgrnas]],
                         axis=1).dropna()

    return score_fc
Ejemplo n.º 2
0
            "EGAN00002143462.sample",
            "EGAN00002143463.sample",
            "EGAN00002143464.sample",
            "EGAN00002143465.sample",
            "EGAN00002143466.sample",
        ],
    )

    # - Imports
    ddir = pkg_resources.resource_filename("data", "organoids/bme2/")
    dreports = pkg_resources.resource_filename("notebooks", "bme/reports/")

    ss = pd.read_excel(f"{ddir}/{organoids['samplesheet']}",
                       index_col=1).query("organoid == 'COLO-027'")

    counts = CRISPRDataSet(organoids, ddir=ddir)

    # -
    samples = list(set(ss.index).intersection(ss.index))
    palette = ss.set_index("name")["palette"]

    # - Fold-changes
    fc = (counts.counts.remove_low_counts(
        counts.plasmids).norm_rpm().foldchange(counts.plasmids))

    fc_gene = fc.groupby(counts.lib.reindex(fc.index)["Gene"]).mean()

    fc_gene_scaled = ReadCounts(fc_gene).scale()

    # -
    fc_gene_scaled.rename(columns=ss["name"]).round(5).to_excel(
Ejemplo n.º 3
0
import pandas as pd
import pkg_resources
import matplotlib.pyplot as plt
from crispy.CRISPRData import CRISPRDataSet
from minlib.Utils import project_score_sample_map, density_interpolate, downsample_sgrnas


LOG = logging.getLogger("Crispy")
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/")


# Project Score samples acquired with Kosuke_Yusa v1.1 library
#

ky = CRISPRDataSet("Yusa_v1.1")
ky_smap = project_score_sample_map()
ky_counts = ky.counts.remove_low_counts(ky.plasmids)


# Downsample number of sgRNAs per gene
#

ds_scores = downsample_sgrnas(
    ky_counts, ky.lib, ky_smap, [1, 2, 3, 4, 5, 100], n_iters=10
)
ds_scores.to_excel(f"{RPATH}/YusaDownsample_AROCs.xlsx", index=False)


# Plot
#
Ejemplo n.º 4
0
import logging
import numpy as np
import pandas as pd
import pkg_resources
from crispy.CRISPRData import CRISPRDataSet, Library
from minlib.Utils import define_sgrnas_sets, estimate_ks

LOG = logging.getLogger("Crispy")
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/")

# Project Score KY v1.1
#

ky = CRISPRDataSet("Yusa_v1.1")
ky_fc = ky.counts.remove_low_counts(ky.plasmids).norm_rpm().foldchange(
    ky.plasmids)
ky_gsets = define_sgrnas_sets(ky.lib, ky_fc, add_controls=True)
ky_ks = estimate_ks(ky_fc, ky_gsets["nontargeting"]["fc"])

# DepMap 19Q2 Avana
#

avana = CRISPRDataSet("Avana_DepMap19Q2")
avana_fc = (avana.counts.remove_low_counts(
    avana.plasmids).norm_rpm().foldchange(avana.plasmids))
avana_gsets = define_sgrnas_sets(avana.lib,
                                 avana_fc,
                                 dataset_name="Avana_DepMap19Q2",
                                 add_controls=True)
Ejemplo n.º 5
0
from natsort import natsorted
from crispy.QCPlot import QCplot
from scipy.stats import spearmanr
from minlib.Utils import replicates_correlation
from crispy.CRISPRData import CRISPRDataSet, Library


LOG = logging.getLogger("Crispy")
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/")


# Project Score samples acquired with Kosuke_Yusa v1.1 library
#

ky = CRISPRDataSet("KM12_coverage")
ky_counts = ky.counts.remove_low_counts(ky.plasmids)
ky_ss = pd.read_excel(
    f"{DPATH}/crispr_manifests/KM12_coverage_samplesheet.xlsx", index_col="sample"
)
ky_ss["name"] = [f"{c}x ({e})" for e, c in ky_ss[["experiment", "coverage"]].values]
ky_ss["rep_name"] = [f"{n} (Rep{r})" for n, r in ky_ss[["name", "replicate"]].values]

# KY v1.1 library
#

ky_lib = Library.load_library("MasterLib_v1.csv.gz").query("Library == 'KosukeYusa'")
ky_lib = ky_lib[ky_lib.index.isin(ky_counts.index)]
ky_lib_fc = ky_counts.loc[ky_lib.index].norm_rpm().foldchange(ky.plasmids)
ky_lib_gene = ky_lib_fc.groupby(ky_lib["Approved_Symbol"]).mean()
ky_lib_gene_avg = ky_lib_gene.groupby(ky_ss["name"], axis=1).mean()
Ejemplo n.º 6
0
ml_lib = Library.load_library(ml_lib_name).query("Library == 'KosukeYusa'")
ml_lib = ml_lib.loc[[i for i in ml_lib.index if not i.startswith("CTRL0")]]

libraries = dict(
    All=dict(
        name="All",
        lib=Library.load_library("MasterLib_v1.csv.gz").query(
            "Library == 'KosukeYusa'"),
    ),
    Minimal=dict(name="Minimal", lib=ml_lib),
)

# HT-29 CRISPR-Cas9 + Dabrafenib timecourse (Day 8, 10, 14, 18 and 21)
#

dabraf_data = CRISPRDataSet("HT29_Dabraf")
dabraf_count = dabraf_data.counts.remove_low_counts(dabraf_data.plasmids)
dabraf_ss = pd.read_csv(
    f"{DPATH}/crispr_manifests/HT29_Dabraf_samplesheet.csv.gz",
    index_col="sample")

# Export data
#

minlibcas9 = Library.load_library(ml_lib_name)
kylib = Library.load_library("MasterLib_v1.csv.gz").query(
    "Library == 'KosukeYusa'")

data_export = dabraf_data.counts.copy()
data_export.insert(0, "MinLibCas9_guide",
                   data_export.index.isin(minlibcas9.index))
    ],
    axis=1,
    sort=False,
)
rawcounts = rawcounts.loc[:, ~rawcounts.columns.duplicated(keep="last")]
rawcounts.to_csv(f"{SPATH}/MinLibCas9_rawcounts.csv.gz", compression="gzip")
rawcounts.to_excel(f"{RPATH}/MinLibCas9_rawcounts.xlsx")

mlib_dataset = dict(
    name="MinLibCas9_Screens",
    read_counts="MinLibCas9_rawcounts.csv.gz",
    library="MinLibCas9.csv.gz",
    plasmids=["MHG_library_v1"],
)

mlib_data = CRISPRDataSet(mlib_dataset, ddir=SPATH)

mlib_count = mlib_data.counts.remove_low_counts(mlib_data.plasmids)
mlib_count.index = [
    i if i.startswith("CTRL") else i.split(".")[0] for i in mlib_count.index
]
mlib_fc_sgrna = mlib_count.norm_rpm().foldchange(mlib_data.plasmids)
mlib_fc_gene = mlib_fc_sgrna.groupby(mlib["Approved_Symbol"]).mean()

# Project Score samples acquired with Kosuke_Yusa v1.1 library
#

ky = CRISPRDataSet("Yusa_v1.1")

ky_smap = project_score_sample_map()
ky_smap = ky_smap[[i.startswith("HT29") for i in ky_smap.index]]
Ejemplo n.º 8
0
import seaborn as sns
import matplotlib.pyplot as plt
from math import sqrt
from crispy.QCPlot import QCplot
from scipy.stats import spearmanr
from sklearn.metrics import mean_squared_error
from crispy.CRISPRData import CRISPRDataSet, Library

LOG = logging.getLogger("Crispy")
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/")

# CRISPR-Cas9 screens in 2 colorectal cancer organoids
#

org_data = CRISPRDataSet("Organoids")
org_count = org_data.counts.remove_low_counts(org_data.plasmids)

# Libraries
#

NGUIDES, REMOVE_DISCORDANT = 2, True
ml_lib_name = (
    f"MinimalLib_top{NGUIDES}{'_disconcordant' if REMOVE_DISCORDANT else ''}.csv.gz"
)
ml_lib = Library.load_library(ml_lib_name).query("Library == 'KosukeYusa'")
ml_lib = ml_lib.loc[[i for i in ml_lib.index if not i.startswith("CTRL0")]]

libraries = dict(
    All=dict(
        name="All",