Beispiel #1
0
LOG = logging.getLogger("Crispy")
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("reports", "eg/")

if __name__ == "__main__":
    # Data-sets
    #

    wes_obj = WES()

    mobem_obj = Mobem()
    cn_obj = CopyNumber()

    prot_obj = Proteomics()
    gexp_obj = GeneExpression()

    crispr_obj = CRISPR()
    drug_obj = DrugResponse()

    # Samples
    #
    samples = set.intersection(set(prot_obj.get_data()))
    LOG.info(f"Samples: {len(samples)}")

    # Filter data-sets
    #
    prot = prot_obj.filter(subset=samples)
    LOG.info(f"Proteomics: {prot.shape}")

    gexp = gexp_obj.filter(subset=samples)
    # Gene information
    #
    ginfo = pd.read_csv(f"{TPATH}/mart_export.txt", sep="\t")
    ginfo["mean_pos"] = ginfo[["Gene end (bp)", "Gene start (bp)"]].mean(1)
    ginfo = ginfo[ginfo["Chromosome/scaffold name"].isin(Utils.CHR_ORDER)]

    ginfo_pos = pd.concat([
        ginfo.groupby("Gene name")["Chromosome/scaffold name"].first().rename(
            "chr"),
        ginfo.groupby("Gene name")["mean_pos"].mean().rename("chr_pos"),
    ],
                          axis=1)

    # Y matrices
    #
    gexp_obj = GeneExpression()
    gexp = gexp_obj.filter()
    LOG.info(f"Gexp: {gexp.shape}")

    prot_obj = Proteomics()
    prot = prot_obj.filter()
    prot = prot[prot.count(1) > 300]
    LOG.info(f"Prot: {prot.shape}")

    # X matrices
    #
    crispr_obj = CRISPR()
    crispr = crispr_obj.filter(dtype="merged")
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo_obj = DrugResponse()

LOG = logging.getLogger("Crispy")
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("reports", "eg/")
TPATH = pkg_resources.resource_filename("tables", "/")


if __name__ == "__main__":
    # Data-sets
    #
    prot_obj = Proteomics()
    prot = prot_obj.filter()
    LOG.info(f"Proteomics: {prot.shape}")

    gexp_obj = GeneExpression()
    gexp = gexp_obj.filter(subset=list(prot))
    LOG.info(f"Transcriptomics: {gexp.shape}")

    crispr_obj = CRISPR()
    crispr = crispr_obj.filter(subset=list(prot))
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo_obj = DrugResponse()

    drespo = drespo_obj.filter()
    drespo.index = [";".join(map(str, i)) for i in drespo.index]

    dmax = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[
        "max_screening_conc"
    ].first()