def main(): global meta_config global study_config global janus_path global logger import logging from support import helper from generate import meta from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id verb = logger.isEnabledFor(logging.INFO) # TODO replace the 'verb' switch with logger logger.info('Gathering and decompressing SEG files into temporary folder, and updating config') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info('Done.') logger.info('Fixing Chromosome numbering ...') fix_chrom(meta_config, study_config, logger) logger.info('Done.') logger.info('Fixing .SEG IDs') fix_seg_id(meta_config, study_config, logger) logger.info('Done.') logger.info('Concatenating SEG Files to export folder') helper.concat_files(meta_config, study_config, verb) logger.info('Done.') logger.info('Generating segments Meta ...') meta.generate_meta_type(meta_config,study_config,logger) logger.info('Done.')
def main(): global meta_config global study_config global janus_path global logger import logging import os from support import helper from generate import meta from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger logger.info('Started processing data for CAP_mutation pipeline') # Decompress MAF files to temp folder logger.info('Decompressing MAF files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) # Clean MAF files logger.info('Cleaning MAF Files ...') support_functions.clean_head(meta_config, verb) # Concatenate MAF files logger.info('Concatenating MAF Files to export folder ...') helper.concat_files(meta_config, study_config, verb) # Generate the meta data files for mutation extended data logger.info('Generating MUTATION_EXTENDED Meta ...') meta.generate_meta_type(meta_config, study_config, logger) #Filtering MAF Files logger.info('Filtering MAF Files ...') support_functions.maf_filter( meta_config, study_config, meta_config.config_map['Mutation_Type'], meta_config.config_map['Filter_Exception'], meta_config.config_map['Minimum_Tumour_Depth'], meta_config.config_map['Minimum_Tumour_AF'], meta_config.config_map['Maximum_gnomAD_AF'], meta_config.config_map['Maximum_Local_Freq']) #oncokb-annotation logger.info('Annotating MAF files ...') support_functions.oncokb_annotation( meta_config, study_config, meta_config.config_map['oncokb_api_token'], verb) #TGL Pipe Filtering logger.info('Filtering TGL pipe ...') try: support_functions.TGL_filter(meta_config, study_config) except FileNotFoundError as err: # eg. failure to read vep_keep_columns.txt logger.error("Cannot read file: {0}".format(err)) raise logger.info('Finished processing data for CAP_mutation pipeline')
def main(): global meta_config global study_config global janus_path global logger import logging from constants.constants import config2name_map from support import helper from generate import meta from generate.analysis_pipelines.MRNA_EXPRESSION.support_functions import alpha_sort, generate_expression_matrix, generate_expression_zscore verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger logger.info( 'Gathering and decompressing MRNA_EXPRESSION files into temporary folder' ) meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info('Alpha sorting each file ...') alpha_sort(meta_config, verb) logger.info('Generating expression matrix ...') generate_expression_matrix(meta_config, study_config, verb) logger.info('Generating expression Meta ...') meta.generate_meta_type(meta_config, study_config, logger) if 'zscores' in meta_config.config_map.keys( ) and meta_config.config_map['zscores']: logger.info('Generating expression Z-Score Data ...') generate_expression_zscore( meta_config, os.path.join( study_config.config_map['output_folder'], 'data_{}.txt'.format( config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler])), study_config.config_map['output_folder'], False, False, verb) logger.info('Generating expression Z-Score Meta ...') # Tweak the config to write zscore metadata; TODO do this more transparently meta_config.datahandler = 'Z-SCORE' meta.generate_meta_type(meta_config, study_config, logger)
def main(): global meta_config global study_config global janus_path global logger import logging from support import helper from generate import meta from generate.analysis_pipelines.MUTATION_EXTENDED import support_functions verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger logger.info('Decompressing VCF files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info( 'Ensuring both columns exist, otherwise adding UNMATCHED column ...') support_functions.verify_dual_columns(meta_config, verb) logger.info('Filtering for only PASS ...') support_functions.filter_vcf_rejects(meta_config, verb) logger.info('Exporting vcf2maf...') logger.info('And deleting .vcf s...') meta_config = support_functions.export2maf(meta_config, study_config, verb) # Generate the meta data files for mutation extended data logger.info('Generating MUTATION_EXTENDED Meta ...') meta.generate_meta_type(meta_config, study_config, logger) logger.info('Cleaning MAF Files ...') support_functions.clean_head(meta_config, verb) logger.info('Concating MAF Files to export folder ...') helper.concat_files(meta_config, study_config, verb) logger.info('Finished processing data for MutectStrelka pipeline')
def main(): global meta_config global study_config global janus_path global logger # imports are moved into the main (and only) method to work with the legacy component class import logging import os from support import helper from generate import meta from generate.analysis_pipelines.MRNA_EXPRESSION.support_functions import alpha_sort, generate_expression_matrix, generate_expression_percentile, generate_expression_zscore, preProcRNA from constants.constants import config2name_map from utilities.constants import DATA_DIRNAME verb = logger.isEnabledFor(logging.INFO) # TODO replace the 'verb' switch with logger if meta_config.config_map.get('genelist'): genelist = meta_config.config_map.get('genelist') else: genelist = os.path.join(os.path.dirname(__file__), DATA_DIRNAME, 'targeted_genelist.txt') if meta_config.config_map.get('enscon'): enscon = meta_config.config_map.get('enscon') else: enscon = os.path.join(os.path.dirname(__file__), DATA_DIRNAME, 'ensemble_conversion.txt') logger.info('Started processing data for CAP_expression pipeline') logger.info('Decompressing MRNA_EXPRESSION files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info('Alpha sorting each file ...') alpha_sort(meta_config, verb) logger.info('Generating expression matrix ...') generate_expression_matrix(meta_config, study_config, verb) #preProcRNA - generate processed continuous data using the generated expression matrix - one for study and one for study comparison and one for TCGA data preProcRNA(meta_config, study_config, '/data_{}_gepcomp.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler]), enscon, genelist, True, False) preProcRNA(meta_config, study_config, '/data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler]), enscon, genelist, False, True) if meta_config.config_map.get('zscores'): # Generate the z-scores for mRNA expression data logger.info('Generating expression Z-Score Data ...') generate_expression_zscore(meta_config, os.path.join(study_config.config_map['output_folder'], 'data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler])), study_config.config_map['output_folder'] , False, False, verb) # Generate the mRNA expression percentile data logger.info('Generating expression Percentile Data ...') generate_expression_percentile(meta_config, os.path.join(study_config.config_map['output_folder'], 'data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + 'Z-SCORE'])), study_config.config_map['output_folder'] , False, False, verb) # Generate the z-score sfor mRNA expression comparison data logger.info('Generating expression Z-Score comparison Data ...') generate_expression_zscore(meta_config, os.path.join(study_config.config_map['output_folder'], 'data_{}_gepcomp.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler])), study_config.config_map['output_folder'] , True, False, verb) # Generate the mRNA expression comparison percentile data logger.info('Generating expression Percentile comparison Data ...') generate_expression_percentile(meta_config, os.path.join(study_config.config_map['output_folder'], 'data_{}.txt'.format(config2name_map[meta_config.alterationtype + ":" + 'Z-SCORE'])), study_config.config_map['output_folder'] , True, False, verb) # Generate the z-scores for mRNA expression TCGA data helper.working_on(verb, message='Generating expression TCGA Z-Score Data ...') generate_expression_zscore(meta_config, os.path.join(study_config.config_map['output_folder'], 'data_{}_tcga.txt'.format(config2name_map[meta_config.alterationtype + ":" + meta_config.datahandler])), study_config.config_map['output_folder'] , False, True, verb) # Generate the TCGA mRNA expression percentile data logger.info('Generating expression TCGA Percentile Data ...') generate_expression_percentile(meta_config, os.path.join(study_config.config_map['output_folder'], 'supplementary_data', 'data_{}_tcga.txt'.format(config2name_map[meta_config.alterationtype + ":" + 'Z-SCORE'])), study_config.config_map['output_folder'] , False, True, verb) # Generate meta data within the handler and not in generator.py # Generate metadata for mRNA expression continuous data logger.info('Generating expression Meta ...') meta.generate_meta_type(meta_config,study_config,logger) # Generate metadata for mRNA expression z-score data if meta_config.config_map.get('zscores'): logger.info('Generating expression Z-Score Meta ...') meta_config.datahandler = 'Z-SCORE' meta.generate_meta_type(meta_config,study_config,logger) logger.info('Finished processing data for CAP_expression pipeline')
def main(): global meta_config global study_config global janus_path global logger import logging import os from support import helper from generate.analysis_pipelines.COPY_NUMBER_ALTERATION.support_functions import fix_chrom, fix_seg_id, preProcCNA, ProcCNA from generate import meta from utilities.constants import DATA_DIRNAME AP_NAME = 'analysis_pipelines' CNA_NAME = 'COPY_NUMBER_ALTERATION' verb = logger.isEnabledFor( logging.INFO) # TODO replace the 'verb' switch with logger # note that __file__ is the path to the executing module components.py, not this script if meta_config.config_map.get('genebed'): genebed = meta_config.config_map.get('genebed') else: genebed = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME, DATA_DIRNAME, 'ncbi_genes_hg19_canonical.bed') if meta_config.config_map.get('genelist'): genelist = meta_config.config_map.get('genelist') else: genelist = os.path.join(os.path.dirname(__file__), AP_NAME, CNA_NAME, DATA_DIRNAME, 'targeted_genelist.txt') logger.info('Transferring SEG files to temporary folder') meta_config = helper.relocate_inputs(meta_config, study_config, verb) logger.info('Done.') logger.info('Fixing Chromosome numbering ...') fix_chrom(meta_config, study_config, logger) logger.info('Done.') logger.info('Fixing .SEG IDs') fix_seg_id(meta_config, study_config, logger) logger.info('Done.') logger.info('Concatenating SEG Files to export folder') helper.concat_files(meta_config, study_config, verb) logger.info('Done.') #Call preProcCNA.r to generate reduced seg files logger.info('Generating reduced SEG files ...') preProcCNA(meta_config, study_config, genebed, genelist, meta_config.config_map['gain'], meta_config.config_map['ampl'], meta_config.config_map['htzd'], meta_config.config_map['hmzd'], logger) logger.info('Done.') logger.info('Generating CNA and log2CNA files ...') ProcCNA(meta_config, study_config, genebed, genelist, meta_config.config_map['gain'], meta_config.config_map['ampl'], meta_config.config_map['htzd'], meta_config.config_map['hmzd'], meta_config.config_map['oncokb_api_token'], verb) logger.info('Done.') # TODO legacy metadata generation left in place for now. But does it make sense for data to be *both* discrete and continuous? logger.info('Generating segments Meta ...') meta.generate_meta_type(meta_config, study_config, logger) logger.info('Done.') if meta_config.config_map.get('DISCRETE'): logger.info('Generating DISCRETE Meta ...') meta_config.datahandler = 'DISCRETE' meta.generate_meta_type(meta_config, study_config, logger) logger.info('Done.') if meta_config.config_map.get('CONTINUOUS'): logger.info('Generating CONTINUOUS Meta ...') meta_config.datahandler = 'CONTINUOUS' meta.generate_meta_type(meta_config, study_config, logger) logger.info('Done.')