Ejemplo n.º 1
0
)

LOG = logging.getLogger("Crispy")
DPATH = pkg_resources.resource_filename("crispy", "data/")
RPATH = pkg_resources.resource_filename("reports", "eg/")

if __name__ == "__main__":
    # Data-sets
    #

    wes_obj = WES()

    mobem_obj = Mobem()
    cn_obj = CopyNumber()

    prot_obj = Proteomics()
    gexp_obj = GeneExpression()

    crispr_obj = CRISPR()
    drug_obj = DrugResponse()

    # Samples
    #
    samples = set.intersection(set(prot_obj.get_data()))
    LOG.info(f"Samples: {len(samples)}")

    # Filter data-sets
    #
    prot = prot_obj.filter(subset=samples)
    LOG.info(f"Proteomics: {prot.shape}")
    ginfo = ginfo[ginfo["Chromosome/scaffold name"].isin(Utils.CHR_ORDER)]

    ginfo_pos = pd.concat([
        ginfo.groupby("Gene name")["Chromosome/scaffold name"].first().rename(
            "chr"),
        ginfo.groupby("Gene name")["mean_pos"].mean().rename("chr_pos"),
    ],
                          axis=1)

    # Y matrices
    #
    gexp_obj = GeneExpression()
    gexp = gexp_obj.filter()
    LOG.info(f"Gexp: {gexp.shape}")

    prot_obj = Proteomics()
    prot = prot_obj.filter()
    prot = prot[prot.count(1) > 300]
    LOG.info(f"Prot: {prot.shape}")

    # X matrices
    #
    crispr_obj = CRISPR()
    crispr = crispr_obj.filter(dtype="merged")
    LOG.info(f"CRISPR: {crispr.shape}")

    drespo_obj = DrugResponse()
    drespo = drespo_obj.filter()
    drespo = drespo[drespo.count(1) > 300]
    drespo = drespo[["+" not in i for i in drespo.index]]
    drespo.index = [";".join(map(str, i)) for i in drespo.index]
Ejemplo n.º 3
0
from crispy.LMModels import LMModels, LModel
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from statsmodels.stats.multitest import multipletests
from crispy.DataImporter import Proteomics, CRISPR, GeneExpression, DrugResponse, Sample


LOG = logging.getLogger("Crispy")
RPATH = pkg_resources.resource_filename("reports", "eg/")


if __name__ == "__main__":
    # Data-sets
    #
    gexp_obj = GeneExpression()
    prot_obj = Proteomics()
    crispr_obj = CRISPR()
    drespo_obj = DrugResponse()

    # Samples
    #
    samples_crispr = set.intersection(
        set(prot_obj.get_data()), set(crispr_obj.get_data(dtype="merged"))
    )
    LOG.info(f"CRISPR samples: {len(samples_crispr)}")

    samples_drug = set.intersection(
        set(prot_obj.get_data()), set(drespo_obj.get_data())
    )
    LOG.info(f"Drug samples: {len(samples_drug)}")