Ejemplo n.º 1
0
def main():
    global meta_config
    global study_config
    global janus_path
    global verb

    from support import helper
    from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions

    import logging
    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    logger.info('Transferring VCF files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    logger.info(
        'Ensuring both columns exist, otherwise adding UNMATCHED column ...')
    support_functions.verify_dual_columns(meta_config, verb)

    logger.info('Exporting vcf2maf...')
    logger.info('And deleting .vcf s...')
    meta_config = support_functions.export2maf(meta_config, study_config, verb)

    # Generate the meta data files for mutation extended data
    logger.info('Generating MUTATION_EXTENDED Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)

    logger.info('Cleaning MAF Files ...')
    support_functions.clean_head(meta_config, verb)

    logger.info('Concating MAF Files to export folder  ...')
    helper.concat_files(meta_config, study_config, verb)
Ejemplo n.º 2
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging

    from support import helper
    from generate import meta
    from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id

    verb = logger.isEnabledFor(logging.INFO) # TODO replace the 'verb' switch with logger

    logger.info('Gathering and decompressing SEG files into temporary folder, and updating config')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)
    logger.info('Done.')

    logger.info('Fixing Chromosome numbering ...')
    fix_chrom(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Fixing .SEG IDs')
    fix_seg_id(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Concatenating SEG Files to export folder')
    helper.concat_files(meta_config, study_config, verb)
    logger.info('Done.')

    logger.info('Generating segments Meta ...')
    meta.generate_meta_type(meta_config,study_config,logger)
    logger.info('Done.')
Ejemplo n.º 3
0
 def test_relocate_inputs(self):
     test_name = 'relocate_inputs'
     input_dir = os.path.join(self.dataDir, test_name)
     out_dir = os.path.join(self.tmp.name, test_name)
     os.mkdir(out_dir)
     inputs = [
         'blue.txt', 'green.tar.gz', 'yellow.tgz', 'purple.tar',
         'red.txt.gz'
     ]
     df = pd.DataFrame({'FILE_NAME': inputs})
     mutate_config = mock_legacy_config({'input_folder': input_dir}, df)
     study_config = mock_legacy_config({'output_folder': out_dir})
     updated_mutate_config = relocate_inputs(mutate_config, study_config,
                                             True)
     outputs = [
         'blue.txt', 'green/green.txt', 'yellow/yellow.txt',
         'purple/purple.txt', 'red.txt'
     ]
     md5sum = 'edc715389af2498a623134608ba0a55b'  # all output files should be identical
     checksums = {output: md5sum for output in outputs}
     mock_output = os.path.join(out_dir, 'temp', 'temp_mock')
     self.verify_checksums(checksums, mock_output)
     self.assertEqual(updated_mutate_config.config_map['input_folder'],
                      mock_output, 'input folder updated')
     outputs = set([
         'blue.txt', 'green.tar.gz', 'yellow.tgz', 'purple.tar', 'red.txt'
     ])
     self.assertTrue(
         outputs == set(
             updated_mutate_config.data_frame['FILE_NAME'].values),
         'filenames updated')
Ejemplo n.º 4
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging
    import os
    from support import helper
    from generate import meta
    from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions

    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    logger.info('Started processing data for CAP_mutation pipeline')

    # Decompress MAF files to temp folder
    logger.info('Decompressing MAF files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    # Clean MAF files
    logger.info('Cleaning MAF Files ...')
    support_functions.clean_head(meta_config, verb)

    # Concatenate MAF files
    logger.info('Concatenating MAF Files to export folder  ...')
    helper.concat_files(meta_config, study_config, verb)

    # Generate the meta data files for mutation extended data
    logger.info('Generating MUTATION_EXTENDED Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)

    #Filtering MAF Files
    logger.info('Filtering MAF Files ...')
    support_functions.maf_filter(
        meta_config, study_config, meta_config.config_map['Mutation_Type'],
        meta_config.config_map['Filter_Exception'],
        meta_config.config_map['Minimum_Tumour_Depth'],
        meta_config.config_map['Minimum_Tumour_AF'],
        meta_config.config_map['Maximum_gnomAD_AF'],
        meta_config.config_map['Maximum_Local_Freq'])

    #oncokb-annotation
    logger.info('Annotating MAF files ...')
    support_functions.oncokb_annotation(
        meta_config, study_config, meta_config.config_map['oncokb_api_token'],
        verb)

    #TGL Pipe Filtering
    logger.info('Filtering TGL pipe ...')
    try:
        support_functions.TGL_filter(meta_config, study_config)
    except FileNotFoundError as err:  # eg. failure to read vep_keep_columns.txt
        logger.error("Cannot read file: {0}".format(err))
        raise

    logger.info('Finished processing data for CAP_mutation pipeline')
Ejemplo n.º 5
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging
    from constants.constants import config2name_map
    from support import helper
    from generate import meta
    from generate.analysis_pipelines.MRNA_EXPRESSION.support_functions import alpha_sort, generate_expression_matrix, generate_expression_zscore

    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    logger.info(
        'Gathering and decompressing MRNA_EXPRESSION files into temporary folder'
    )
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    logger.info('Alpha sorting each file ...')
    alpha_sort(meta_config, verb)

    logger.info('Generating expression matrix ...')
    generate_expression_matrix(meta_config, study_config, verb)

    logger.info('Generating expression Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)

    if 'zscores' in meta_config.config_map.keys(
    ) and meta_config.config_map['zscores']:
        logger.info('Generating expression Z-Score Data ...')
        generate_expression_zscore(
            meta_config,
            os.path.join(
                study_config.config_map['output_folder'], 'data_{}.txt'.format(
                    config2name_map[meta_config.alterationtype + ":" +
                                    meta_config.datahandler])),
            study_config.config_map['output_folder'], False, False, verb)
        logger.info('Generating expression Z-Score Meta ...')
        # Tweak the config to write zscore metadata; TODO do this more transparently
        meta_config.datahandler = 'Z-SCORE'
        meta.generate_meta_type(meta_config, study_config, logger)
Ejemplo n.º 6
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging
    from support import helper
    from generate import meta
    from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions

    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    logger.info('Decompressing VCF files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    logger.info(
        'Ensuring both columns exist, otherwise adding UNMATCHED column ...')
    support_functions.verify_dual_columns(meta_config, verb)

    logger.info('Filtering for only PASS ...')
    support_functions.filter_vcf_rejects(meta_config, verb)

    logger.info('Exporting vcf2maf, and deleting .VCFs')
    meta_config = support_functions.export2maf(meta_config, study_config, verb)

    # Generate the meta data files for mutation extended data
    logger.info('Generating MUTATION_EXTENDED Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)

    logger.info('Cleaning MAF Files ...')
    support_functions.clean_head(meta_config, verb)

    logger.info('Concating MAF Files to export folder  ...')
    helper.concat_files(meta_config, study_config, verb)

    logger.info('Finished processing data for Mutect pipeline')
Ejemplo n.º 7
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    # imports are moved into the main (and only) method to work with the legacy component class
    import logging
    import os
    from support import helper
    from generate import meta
    from generate.analysis_pipelines.MRNA_EXPRESSION.support_functions import alpha_sort, generate_expression_matrix, generate_expression_percentile, generate_expression_zscore, preProcRNA
    from constants.constants import config2name_map
    from utilities.constants import DATA_DIRNAME

    verb = logger.isEnabledFor(logging.INFO) # TODO replace the 'verb' switch with logger

    if meta_config.config_map.get('genelist'):
        genelist = meta_config.config_map.get('genelist')
    else:
        genelist = os.path.join(os.path.dirname(__file__), DATA_DIRNAME, 'targeted_genelist.txt')
    if meta_config.config_map.get('enscon'):
        enscon = meta_config.config_map.get('enscon')
    else:
        enscon = os.path.join(os.path.dirname(__file__), DATA_DIRNAME, 'ensemble_conversion.txt')

    logger.info('Started processing data for CAP_expression pipeline')
    
    logger.info('Decompressing MRNA_EXPRESSION files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)

    logger.info('Alpha sorting each file ...')
    alpha_sort(meta_config, verb)

    logger.info('Generating expression matrix ...')
    generate_expression_matrix(meta_config, study_config, verb)

    #preProcRNA - generate processed continuous data using the generated expression matrix - one for study and one for study comparison and one for TCGA data
    preProcRNA(meta_config, study_config, '/data_{}_gepcomp.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler]), enscon, genelist, True, False)
    preProcRNA(meta_config, study_config, '/data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler]), enscon, genelist, False, True)

    if meta_config.config_map.get('zscores'):
        # Generate the z-scores for mRNA expression data
        logger.info('Generating expression Z-Score Data ...')
        generate_expression_zscore(meta_config, os.path.join(study_config.config_map['output_folder'],
            'data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler])),
            study_config.config_map['output_folder']
            , False, False, verb)

        # Generate the mRNA expression percentile data
        logger.info('Generating expression Percentile Data ...')
        generate_expression_percentile(meta_config, os.path.join(study_config.config_map['output_folder'],
            'data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + 'Z-SCORE'])),
            study_config.config_map['output_folder']
            , False, False, verb)

        # Generate the z-score sfor mRNA expression comparison data
        logger.info('Generating expression Z-Score comparison Data ...')
        generate_expression_zscore(meta_config, os.path.join(study_config.config_map['output_folder'],
            'data_{}_gepcomp.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler])),
            study_config.config_map['output_folder']
            , True, False, verb)

        # Generate the mRNA expression comparison percentile data
        logger.info('Generating expression Percentile comparison Data ...')
        generate_expression_percentile(meta_config, os.path.join(study_config.config_map['output_folder'], 'data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + 'Z-SCORE'])),
            study_config.config_map['output_folder']
            , True, False, verb)

        # Generate the z-scores for mRNA expression TCGA data
        helper.working_on(verb, message='Generating expression TCGA Z-Score Data ...')
        generate_expression_zscore(meta_config, os.path.join(study_config.config_map['output_folder'],
            'data_{}_tcga.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler])),
            study_config.config_map['output_folder']
            , False, True, verb)

        # Generate the TCGA mRNA expression percentile data
        logger.info('Generating expression TCGA Percentile Data ...')
        generate_expression_percentile(meta_config, os.path.join(study_config.config_map['output_folder'], 'supplementary_data', 
            'data_{}_tcga.txt'.format(config2name_map[meta_config.alterationtype + ":" + 'Z-SCORE'])),
            study_config.config_map['output_folder']
            , False, True, verb)

    # Generate meta data within the handler and not in generator.py
    # Generate metadata for mRNA expression continuous data
    logger.info('Generating expression Meta ...')
    meta.generate_meta_type(meta_config,study_config,logger)
    
    # Generate metadata for mRNA expression z-score data
    if meta_config.config_map.get('zscores'):
        logger.info('Generating expression Z-Score Meta ...')
        meta_config.datahandler = 'Z-SCORE'
        meta.generate_meta_type(meta_config,study_config,logger)

    logger.info('Finished processing data for CAP_expression pipeline')
Ejemplo n.º 8
0
def main():
    global meta_config
    global study_config
    global janus_path
    global logger

    import logging
    import os
    from support import helper
    from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id, preProcCNA, ProcCNA
    from generate import meta
    from utilities.constants import DATA_DIRNAME

    AP_NAME = 'analysis_pipelines'
    CNA_NAME = 'COPY_NUMBER_ALTERATION'

    verb = logger.isEnabledFor(
        logging.INFO)  # TODO replace the 'verb' switch with logger

    # note that __file__ is the path to the executing module components.py, not this script
    if meta_config.config_map.get('genebed'):
        genebed = meta_config.config_map.get('genebed')
    else:
        genebed = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME,
                               DATA_DIRNAME, 'ncbi_genes_hg19_canonical.bed')
    if meta_config.config_map.get('genelist'):
        genelist = meta_config.config_map.get('genelist')
    else:
        genelist = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME,
                                DATA_DIRNAME, 'targeted_genelist.txt')

    logger.info('Transferring SEG files to temporary folder')
    meta_config = helper.relocate_inputs(meta_config, study_config, verb)
    logger.info('Done.')

    logger.info('Fixing Chromosome numbering ...')
    fix_chrom(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Fixing .SEG IDs')
    fix_seg_id(meta_config, study_config, logger)
    logger.info('Done.')

    logger.info('Concatenating SEG Files to export folder')
    helper.concat_files(meta_config, study_config, verb)
    logger.info('Done.')

    #Call preProcCNA.r to generate reduced seg files
    logger.info('Generating reduced SEG files ...')
    preProcCNA(meta_config, study_config, genebed, genelist,
               meta_config.config_map['gain'], meta_config.config_map['ampl'],
               meta_config.config_map['htzd'], meta_config.config_map['hmzd'],
               logger)
    logger.info('Done.')

    logger.info('Generating CNA and log2CNA files ...')
    ProcCNA(meta_config, study_config, genebed, genelist,
            meta_config.config_map['gain'], meta_config.config_map['ampl'],
            meta_config.config_map['htzd'], meta_config.config_map['hmzd'],
            meta_config.config_map['oncokb_api_token'], verb)
    logger.info('Done.')

    # TODO legacy metadata generation left in place for now. But does it make sense for data to be *both* discrete and continuous?
    logger.info('Generating segments Meta ...')
    meta.generate_meta_type(meta_config, study_config, logger)
    logger.info('Done.')

    if meta_config.config_map.get('DISCRETE'):
        logger.info('Generating DISCRETE Meta ...')
        meta_config.datahandler = 'DISCRETE'
        meta.generate_meta_type(meta_config, study_config, logger)
        logger.info('Done.')

    if meta_config.config_map.get('CONTINUOUS'):
        logger.info('Generating CONTINUOUS Meta ...')
        meta_config.datahandler = 'CONTINUOUS'
        meta.generate_meta_type(meta_config, study_config, logger)
        logger.info('Done.')