Python load_by_patient Exemples, methylation.loader.load_by_patient Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : dmr_comparisons.py Projet : gaberosser/qmul-bioinf

def load_methylation(pids,
                     ref_names=None,
                     norm_method='swan',
                     ref_name_filter=None,
                     units='beta'):
    """
    Load and prepare the Illumina methylation data
    """
    # patient data
    obj = loader.load_by_patient(pids, norm_method=norm_method)
    anno = loader.load_illumina_methylationepic_annotation()

    # reference data
    if ref_names is not None:
        ref_obj = loader.load_reference(ref_names, norm_method=norm_method)
        if ref_name_filter is not None:
            ref_obj.filter_by_sample_name(ref_name_filter, exact=True)
        obj = loader.loader.MultipleBatchLoader([obj, ref_obj])

    me_data = obj.data.dropna()
    if units == 'm':
        me_data = process.m_from_beta(me_data)

    # reduce anno and data down to common probes
    common_probes = anno.index.intersection(me_data.index)

    anno = anno.loc[common_probes]
    # dmr.add_merged_probe_classes(anno)
    me_data = me_data.loc[common_probes]
    obj.data = me_data

    return obj, anno

Exemple #2

0

Afficher le fichier

Fichier : cluster_lines_methylation.py Projet : gaberosser/qmul-bioinf

        'DURA054_IPSC_N3C_P11',
        'DURA054_FB_P5',
        'DURA061_NSC_N4_P2',
        'DURA061_NSC_N6_P4',
        'DURA061_NSC_N1_P3n4',
        'DURA026_NSC_N31D_P5',
        'DURA052_NSC_N4_P3',
        'DURA052_NSC_N5_P2',
        'GIBCONSC_P4',
        # 'DURA052_NH16_2214_P6_14/04/2017',
        # 'DURA026_NH16_270_P8_15/05/2017',
        # 'DURA018_NH15_1877_P6_15/05/2017',
    ]

    patient_obj = loader.load_by_patient(pids,
                                         norm_method=norm_method,
                                         samples=our_samples)

    nazor_ldr = loader.load_reference('GSE31848', norm_method=norm_method)
    ix = nazor_ldr.meta.index.str.contains(r'(ES__WA)|(iPS__HDF)')
    ix = ix & (~nazor_ldr.meta.index.str.contains(r'HDF51IPS7')
               )  # this sample is an outlier, so remove it now
    nazor_ldr.filter_samples(ix)

    # Zhou et al.: lots of samples here, but we'll only keep 2 x ESC lines
    zhou_ldr = loader.load_reference('GSE92462_450K', norm_method=norm_method)
    ix = zhou_ldr.meta.index.str.contains(r'^H[19]ES')
    zhou_ldr.filter_samples(ix)

    hip_epic_ldr = loader.hipsci(norm_method=norm_method,
                                 n_sample=n_hipsci,

Exemple #3

0

Afficher le fichier

    return {'axs': axs, 'fig': fig}


if __name__ == "__main__":
    pids = consts.PIDS
    norm_method = 'swan'
    dmr_params = consts.DMR_PARAMS
    dmr_params['n_jobs'] = mp.cpu_count()

    outdir = output.unique_output_dir()
    DMR_LOAD_DIR = os.path.join(INTERMEDIATE_DIR, 'dmr')

    # load our data
    our_obj = loader.load_by_patient(pids,
                                     norm_method=norm_method,
                                     samples=consts.S1_METHYL_SAMPLES)
    anno = loader.load_illumina_methylationepic_annotation()
    our_obj.meta.insert(
        0, 'patient_id',
        our_obj.meta.index.str.replace(r'(GBM|DURA)(?P<pid>[0-9]{3}).*',
                                       '\g<pid>'))

    # load validation data
    val_obj = loader.load_reference('GSE92462_450k', norm_method=norm_method)
    # filter
    val_obj.filter_samples(val_obj.meta.type.isin(['GBM (GSC)', 'NSC']))

    # TODO: upload to the classifier and run (toggle this so it's only run once)

    # combine and reduce probes

Exemple #4

0

Afficher le fichier

Fichier : analyse_dmr_direction_ffpe_vs_syngeneic.py Projet : gaberosser/qmul-bioinf

if __name__ == "__main__":
    pids = consts.PIDS
    norm_method = 'swan'
    alpha = 0.05
    pk_alpha = -np.log10(alpha)

    dmr_params = consts.DMR_PARAMS
    dmr_params['n_jobs'] = mp.cpu_count()

    outdir = output.unique_output_dir()
    DMR_LOAD_DIR = os.path.join(INTERMEDIATE_DIR, 'dmr')

    # load our data
    cc_obj = loader.load_by_patient(pids,
                                    norm_method=norm_method,
                                    samples=consts.S1_METHYL_SAMPLES)
    ffpe_obj = loader.load_by_patient(pids,
                                      norm_method=norm_method,
                                      type='ffpe')

    anno = loader.load_illumina_methylationepic_annotation()
    # add patient ID column to metadata
    cc_obj.meta.insert(
        0, 'patient_id',
        cc_obj.meta.index.str.replace(r'(GBM|DURA)(?P<pid>[0-9]{3}).*',
                                      '\g<pid>'))
    ffpe_obj.meta.insert(
        0, 'patient_id',
        [hgic_consts.NH_ID_TO_PATIENT_ID_MAP[t] for t in ffpe_obj.meta.index])
    ffpe_obj.meta.insert(1, 'type', 'ffpe')

Exemple #5

0

Afficher le fichier

Fichier : methylation_locus_plot_for_de_dmr_genes.py Projet : gaberosser/qmul-bioinf

from utils import output, log, setops
from scripts.hgic_final import consts, two_strategies_grouped_dispersion as tsgd
from methylation import loader as methylation_loader, dmr, process
from rnaseq import loader as rnaseq_loader
from settings import INTERMEDIATE_DIR

from plotting import genomics

logger = log.get_console_logger()

if __name__ == '__main__':
    outdir = output.unique_output_dir()

    # load methylation and DMR data
    meth_obj = methylation_loader.load_by_patient(consts.PIDS,
                                                  include_control=False)
    meth_obj.filter_by_sample_name(consts.S1_METHYL_SAMPLES_GIC +
                                   consts.S1_METHYL_SAMPLES_INSC)
    meth_obj.meta.insert(
        0, 'patient_id',
        meth_obj.meta.index.str.replace(r'(GBM|DURA)(?P<pid>[0-9]{3}).*',
                                        '\g<pid>'))

    mdat = process.m_from_beta(meth_obj.data)

    norm_method_s1 = 'swan'
    dmr_params = consts.DMR_PARAMS
    de_params = consts.DE_PARAMS

    DMR_LOAD_DIR = os.path.join(INTERMEDIATE_DIR, 'dmr')
    DE_LOAD_DIR = os.path.join(INTERMEDIATE_DIR, 'de')

Exemple #6

0

Afficher le fichier


if __name__ == "__main__":
    """
    Here, we simply load the methylation data and export it in an efficient manner (restricting the floating point
    bit depth to save space).

    We also export the annotation and metadata separately.
    """
    norm_method = 'swan'
    # the float format is used when exporting to Excel - it reduces the file size by restricting the precision
    float_format = '%.2f'
    outdir = output.unique_output_dir()
    anno = loader.load_illumina_methylationepic_annotation()
    obj_cc = loader.load_by_patient(consts.PIDS,
                                    type='cell_culture',
                                    norm_method=norm_method,
                                    reduce_to_common_probes=False)
    obj_ff = loader.load_by_patient(consts.PIDS,
                                    type='ffpe',
                                    norm_method=norm_method,
                                    reduce_to_common_probes=False)
    # add useful patient ID column to metadata
    obj_ff.meta.insert(0, 'patient_id',
                       [NH_ID_TO_PATIENT_ID_MAP[k] for k in obj_ff.meta.index])

    # export methylation data
    obj_cc.data.to_excel(os.path.join(outdir,
                                      "methylation_beta_cell_culture.xlsx"),
                         float_format=float_format)
    obj_ff.data.to_excel(os.path.join(outdir, "methylation_beta_ffpe.xlsx"),
                         float_format=float_format)

Exemple #7

0

Afficher le fichier

        'core_min_sample_overlap': 3,  # 3 / 4 samples must match
        'd_max': 400,
        'n_min': 6,
        'delta_m_min': 1.4,
        'fdr': 0.01,
        'dmr_test_method': 'mwu',  # 'mwu', 'mwu_permute'
        'test_kwargs': {},
        'n_jobs': 4,
    }
    norm_method = 'swan'

    intersecter = lambda x, y: set(x).intersection(y)
    unioner = lambda x, y: set(x).union(y)

    # Load DNA Methylation
    me_obj = loader.load_by_patient(pids, norm_method=norm_method)
    me_meta = me_obj.meta
    # me_data, me_meta = methylation_array.load_by_patient(pids)
    # me_data.dropna(inplace=True)
    # me_data = process.m_from_beta(me_data)

    me_data = process.m_from_beta(me_obj.data)

    anno = loader.load_illumina_methylationepic_annotation()
    # anno = methylation_array.load_illumina_methylationepic_annotation()

    # reduce anno and data down to common probes
    common_probes = anno.index.intersection(me_data.index)
    anno = anno.loc[common_probes]
    me_data = me_data.loc[common_probes]

Exemple #8

0

Afficher le fichier

        'n_min': 6,
        'delta_m_min': 1.4,
        'alpha': 0.01,
        'dmr_test_method': 'mwu',  # 'mwu', 'mwu_permute'
        'test_kwargs': {},
    }
    norm_method_s1 = 'swan'

    ############
    # 1: FFPE  #
    ############

    # in this case, we want the median beta value over all probes that are associated with a given gene
    # we'll exclude those associated with gene body only

    ffpe_obj = loader.load_by_patient(pids, type='ffpe', norm_method=norm_method_s1)
    anno = loader.load_illumina_methylationepic_annotation(split_genes=False)

    # reduce anno to (probe ID, gene, relation)
    probe_tups = set()
    for i, row in anno.iterrows():
        if pd.isnull(row.UCSC_RefGene_Name):
            continue
        genes = row.UCSC_RefGene_Name.split(';')
        rels = row.UCSC_RefGene_Group.split(';')
        for g, r in zip(genes, rels):
            probe_tups.add(
                (i, g, r)
            )

    probe_tups = list(probe_tups)

Exemple #9

0

Afficher le fichier

    n_probe_to_show = 2000
    clustering_metric = 'euclidean'

    outdir = output.unique_output_dir()

    norm_method = 'swan'
    pdx_bulk_samples = ['SM18_108A_GBM019Luc_PDX1', 'SM18_119A_GBM019Luc_PDX2']
    gic_late_samples = [
        'GBM019Luc_P12',
        'GBM019Luc_P3_PDX1',
        'GBM019Luc_P2_PDX2',
    ]

    # load all relevant data
    our_gic_obj = loader.load_by_patient(consts.PIDS,
                                         include_control=False,
                                         samples=consts.S1_METHYL_SAMPLES_GIC,
                                         norm_method=norm_method)
    our_ffpe_obj = loader.load_by_patient(consts.PIDS,
                                          type='ffpe',
                                          include_control=False,
                                          norm_method=norm_method)
    pdx_bulk = loader.load_reference('2018-12-14',
                                     norm_method=norm_method,
                                     samples=pdx_bulk_samples)
    gic_late = loader.load_reference('2018-12-06',
                                     norm_method=norm_method,
                                     samples=gic_late_samples)

    # add patient ID to samples
    our_gic_obj.meta.insert(
        0, 'patient_id',