def before_config(): my_search_patterns = { 'multiqc_npm/picard_quality_yield_metrics': { 'fn': '*.quality_yield_metrics.txt', 'shared': 'true' }, 'multiqc_npm/samtools_stats_bq': { 'fn': '*.stats', 'contents': 'This file was produced by samtools stats', 'shared': 'true' }, 'multiqc_npm/bcftools_gtcheck': { 'fn': '*.bcftools_gtcheck.txt', 'shared': 'true' }, 'multiqc_npm/sg10k_cov_062017': { 'fn': '*.sg10k_cov_062017.txt' }, 'multiqc_npm/count_variants': { 'fn': '*.variant_counts.json' }, 'multiqc_npm/mosdepth': { 'fn': '*.mosdepth.csv' } } config.update_dict(config.sp, my_search_patterns) log.info("Expanded search patterns with the following: %s", ", ".join(my_search_patterns.keys()))
def load_config(): my_search_patterns = { 'sequana_coverage': { 'fn': 'sequana_summary_coverage.json' }, 'sequana_pacbio_qc': { 'fn': 'sequana_summary*.json' }, 'sequana_quality_control': { 'fn': 'summary*.json' }, 'sequana_isoseq_qc': { 'fn': 'sequana_summary*.json' }, 'sequana_isoseq': { 'fn': 'sequana_summary*.json' }, 'sequana_kraken': { 'fn': 'sequana_kraken_summary.json' }, 'sequana_pacbio_amplicon': { 'fn': 'sequana_pacbio_amplicon_*.json' }, 'sequana_bamtools_stats': { 'fn': 'sequana_bamtools_stats_*.txt' }, } config.update_dict(config.sp, my_search_patterns)
def gatkdoc_plugin_execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin if config.kwargs.get('disable_plugin', True): return None log.info("Running GATK DepthOfCoverage MultiQC Plugin v{}".format(config.gatkdoc_plugin_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. # Add to the search patterns used by modules if 'gatkdoc/key_value_pairs' not in config.sp: config.update_dict( config.sp, { 'gatkdoc/key_value_pairs': { 'fn': '*.sample_summary' } } ) # Some additional filename cleaning config.fn_clean_exts.extend([ '.depthOfCov.COUNT_READS', ])
def multiqc_ngi_config(): """ Set up MultiQC config defaults for this package """ # Module search patterns ngi_search_patterns = { 'ngi_rnaseq/featureCounts_biotype': { 'fn': '*_biotype_counts.txt' }, 'ngi_rnaseq/dupradar_intslope': { 'fn': '*intercept_slope.txt' }, 'ngi_rnaseq/dupradar_gml_intslope': { 'fn': '*_duprateExpDensCurve.txt' }, 'ngi_rnaseq/heatmap': { 'fn': 'log2CPM_sample_distances.txt' }, 'ngi_rnaseq/mds_plot': { 'fn': 'edgeR_MDS_plot_coordinates.txt' }, } config.update_dict(config.sp, ngi_search_patterns) # Use the NGI template by default config.template = 'ngi' # Push parsed results to StatusDB config.push_statusdb = True # Additional filename cleaning for NGI pipelines config.fn_clean_exts.extend(['.bowtie_log', '.featureCounts']) # Ignore intermediate files from WGS Piper results config.fn_ignore_paths.extend([ '*/piper_ngi/01_raw_alignments/*', '*/piper_ngi/02_preliminary_alignment_qc/*', '*/piper_ngi/03_genotype_concordance/*', '*/piper_ngi/04_merged_alignments/*' ]) # Save generated reports remotely on the tools server config.save_remote = False config.remote_sshkey = None config.remote_port = None config.remote_destination = None # General MultiQC_NGI options config.disable_ngi = False
def example_plugin_execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin if config.kwargs.get('disable_plugin', True): return None log.info("Running Example MultiQC Plugin v{}".format( config.example_plugin_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. # Add to the search patterns used by modules if 'my_example/key_value_pairs' not in config.sp: config.update_dict( config.sp, {'my_example/key_value_pairs': { 'fn': 'my_plugin_output.tsv' }}) if 'my_example/plot_data' not in config.sp: config.update_dict( config.sp, {'my_example/plot_data': { 'fn': 'my_plugin_plotdata.tsv' }}) # Some additional filename cleaning config.fn_clean_exts.extend(['.my_tool_extension', '.removeMetoo']) # Ignore some files generated by the custom pipeline config.fn_ignore_paths.extend([ '*/my_awesome_pipeline/fake_news/*', '*/my_awesome_pipeline/red_herrings/*', '*/my_awesome_pipeline/noisy_data/*', '*/my_awesome_pipeline/rubbish/*' ])
def mga_plugin_execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ log.debug("Running Multi Genome Alignment MultiQC Plugin v{}".format( config.mga_plugin_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. # Add to the search patterns used by modules if 'mga' not in config.sp: config.update_dict(config.sp, {'mga': {'fn': '*.mga.xml'}})
def artic_mqc_execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin if config.kwargs.get('disable_plugin', True): return None log.info("Running ARTIC pipeline MultiQC Plugin v{}".format( config.artic_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. # Add to the search patterns used by modules if 'artic_mqc/aligntrim_reports' not in config.sp: config.update_dict( config.sp, {'artic_mqc/aligntrim_reports': { 'fn': '*.alignreport.txt' }}) if 'artic_mqc/vcfcheck_reports' not in config.sp: config.update_dict( config.sp, {'artic_mqc/vcfcheck_reports': { 'fn': '*.vcfreport.txt' }}) # Ignore some files generated by the custom pipeline config.fn_ignore_paths.extend( ['*/*.fasta', '*/*.hdf', '*/*primertrimmed*']) # Some additional filename cleaning config.fn_clean_exts.extend( ['.sorted', '.trimmed', '.rg', '.pass', '.alignreport'])
def execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin if config.kwargs.get('disable_plugin', True): return None log.info("Running MultiQC BLR Plugin v{}".format( config.multiqc_blr_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. # Increase filesize limit, large log files might be missed otherwise config.mqc_add_config({'log_filesize_limit': 100_000_000}) # Add to the search patterns used by modules if 'stats' not in config.sp: # Current looking for file ending with ".log" and having the content "SETTINGS FOR:" on the first line config.update_dict( config.sp, { 'stats': { 'fn': '*.log', 'contents_re': '^SETTINGS FOR:*', 'num_lines': 1 } }) if 'hapcut2/phasing_stats' not in config.sp: # Current looking for file containing the string "switch rate:" on the first line. config.update_dict( config.sp, { 'hapcut2/phasing_stats': { 'fn': '*.txt', 'contents_re': '^switch rate:*', 'num_lines': 1 } }) if 'hapcut2/phaseblocks' not in config.sp: # Currently looking for file containing the string "switch rate:" on the first line. config.update_dict( config.sp, { 'hapcut2/phaseblocks': { 'fn': '*.phase', 'contents_re': "^BLOCK:*", 'num_lines': 1 } })
def execution_start(): """Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Add to the search patterns used by modules if "ALFA" not in config.sp: config.update_dict(config.sp, {"ALFA": { "fn": "*ALFA_feature_counts.tsv" }}) if "tin-score" not in config.sp: config.update_dict(config.sp, {"tin-score": {"fn": "TIN_score.tsv"}}) if "zpca/pca" not in config.sp: config.update_dict(config.sp, {"zpca/pca": {"fn": "PCA.tsv"}}) if "zpca/scree" not in config.sp: config.update_dict(config.sp, {"zpca/scree": {"fn": "scree.tsv"}})
def __init__(self): log.debug('Running config_loaded hook. Loading specific settings and metadata') with open(join(dirname(__file__), 'multiqc_config.yaml')) as f: cfg = yaml.load(f) config.update_dict(config.__dict__, cfg)
def quartet_rnaseq_report_execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin # if config.kwargs.get('disable_plugin', True): # return None log.info("Running Example MultiQC Plugin v{}".format( config.quartet_rnaseq_report_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. ### Module-rnaseq_data_generation_information if 'rnaseq_data_generation_information/information' not in config.sp: config.update_dict( config.sp, { 'rnaseq_data_generation_information/information': { 'fn_re': '^information.json$' } }) ### Module-rnaseq_performance_assessment if 'rnaseq_performance_assessment/quality_score' not in config.sp: config.update_dict( config.sp, { 'rnaseq_performance_assessment/quality_score': { 'fn_re': '^quality_score.txt$' } }) if 'rnaseq_performance_assessment/performance_of_absolute_exp' not in config.sp: config.update_dict( config.sp, { 'rnaseq_performance_assessment/performance_of_absolute_exp': { 'fn_re': '^performance_of_absolute_exp.txt$' } }) if 'rnaseq_performance_assessment/performance_of_relative_exp' not in config.sp: config.update_dict( config.sp, { 'rnaseq_performance_assessment/performance_of_relative_exp': { 'fn_re': '^performance_of_relative_exp.txt$' } }) if 'rnaseq_performance_assessment/pca_with_snr' not in config.sp: config.update_dict( config.sp, { 'rnaseq_performance_assessment/pca_with_snr': { 'fn_re': '^pca_with_snr.txt$' } }) if 'rnaseq_performance_assessment/relative_exp_correlation' not in config.sp: config.update_dict( config.sp, { 'rnaseq_performance_assessment/relative_exp_correlation': { 'fn_re': '^relative_exp_correlation.txt$' } }) if 'rnaseq_performance_assessment/absolute_exp_correlation' not in config.sp: config.update_dict( config.sp, { 'rnaseq_performance_assessment/absolute_exp_correlation': { 'fn_re': '^absolute_exp_correlation.txt$' } }) if 'rnaseq_performance_assessment/qc_metrics_summary' not in config.sp: config.update_dict( config.sp, { 'rnaseq_performance_assessment/qc_metrics_summary': { 'fn_re': '^qc_metrics_summary.txt$' } }) ### Module-rnaseq_raw_qc if 'rnaseq_raw_qc/zip' not in config.sp: config.update_dict(config.sp, {'rnaseq_raw_qc/zip': { 'fn': '*_fastqc.zip' }}) if 'rnaseq_raw_qc/data' not in config.sp: config.update_dict(config.sp, {'rnaseq_raw_qc/data': { 'fn': 'fastqc_data.txt' }}) if 'rnaseq_raw_qc/fastq_screen' not in config.sp: config.update_dict( config.sp, {'rnaseq_raw_qc/fastq_screen': { 'fn': '*_screen.txt' }}) ### Module-post_alignment_qc_modules if 'rnaseq_post_alignment_qc/bam_qc/genome_results' not in config.sp: config.update_dict( config.sp, { 'rnaseq_post_alignment_qc/bam_qc/genome_results': { 'fn': 'genome_results.txt' } }) if 'rnaseq_post_alignment_qc/bam_qc/coverage' not in config.sp: config.update_dict( config.sp, { 'rnaseq_post_alignment_qc/bam_qc/coverage': { 'fn': 'coverage_histogram.txt' } }) if 'rnaseq_post_alignment_qc/bam_qc/insert_size' not in config.sp: config.update_dict( config.sp, { 'rnaseq_post_alignment_qc/bam_qc/insert_size': { 'fn': 'insert_size_histogram.txt' } }) if 'rnaseq_post_alignment_qc/bam_qc/gc_dist' not in config.sp: config.update_dict( config.sp, { 'rnaseq_post_alignment_qc/bam_qc/gc_dist': { 'fn': 'mapped_reads_gc-content_distribution.txt' } }) if 'rnaseq_post_alignment_qc/rnaseq_qc/rnaseq_qc_results' not in config.sp: config.update_dict( config.sp, { 'rnaseq_post_alignment_qc/rnaseq_qc/rnaseq_qc_results': { 'fn': 'rnaseq_qc_results.txt' } }) if 'rnaseq_post_alignment_qc/rnaseq_qc/coverage' not in config.sp: config.update_dict( config.sp, { 'rnaseq_post_alignment_qc/rnaseq_qc/coverage': { 'fn': 'coverage_profile_along_genes_*' } }) # # Some additional filename cleaning # config.fn_clean_exts.extend([ # '.my_tool_extension', # '.removeMetoo' # ]) # # Ignore some files generated by the custom pipeline # config.fn_ignore_paths.extend([ # '*/my_awesome_pipeline/fake_news/*', # '*/my_awesome_pipeline/red_herrings/*', # '*/my_awesome_pipeline/noisy_data/*', # '*/my_awesome_pipeline/rubbish/*' # ]) config.module_order = [ 'rnaseq_data_generation_information', 'rnaseq_performance_assessment', 'rnaseq_raw_qc', 'rnaseq_post_alignment_qc', 'rnaseq_supplementary' ] config.exclude_modules = ['fastqc', 'fastq_screen', 'qualimap'] config.log_filesize_limit = 2000000000
def UPHL_plugin_execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin if config.kwargs.get('disable_plugin', True): return None log.info("Running MultiQC UPHL Plugin v{}".format( config.multiqc_uphl_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. # Add to the search patterns used by modules if 'mash' not in config.sp: config.update_dict(config.sp, {'mash': {'fn': '*_mashdist.txt'}}) if 'abricate' not in config.sp: config.update_dict(config.sp, {'abricate': { 'fn': '*abricate_summary.txt' }}) if 'seqyclean' not in config.sp: config.update_dict(config.sp, {'seqyclean': { 'fn': '*SummaryStatistics.tsv' }}) if 'cgpipeline' not in config.sp: config.update_dict(config.sp, {'cgpipeline': { 'fn': '*cgpipeline.txt' }}) if 'blobtools/json' not in config.sp: config.update_dict(config.sp, {'blobtools/json': { 'fn': '*.blobDB.json' }}) if 'blobtools/stats' not in config.sp: config.update_dict(config.sp, {'blobtools/stats': { 'fn': '*.blobplot.stats.txt' }}) if 'blobtools/table' not in config.sp: config.update_dict(config.sp, {'blobtools/table': { 'fn': '*.blobDB.table.txt' }}) if 'seqsero' not in config.sp: config.update_dict(config.sp, {'seqsero': { 'fn': 'Seqsero_result.txt' }}) if 'roary/qc' not in config.sp: config.update_dict(config.sp, {'roary/qc': {'fn': 'qc_report.csv'}}) if 'roary/summary' not in config.sp: config.update_dict(config.sp, {'roary/summary': { 'fn': 'summary_statistics.txt' }}) if 'roary/gene_presence' not in config.sp: config.update_dict( config.sp, {'roary/gene_presence': { 'fn': 'gene_presence_absence.Rtab' }}) if 'roary/conserved_genes' not in config.sp: config.update_dict(config.sp, { 'roary/conserved_genes': { 'fn': 'number_of_conserved_genes.Rtab' } }) if 'roary/total_genes' not in config.sp: config.update_dict(config.sp, { 'roary/total_genes': { 'fn': 'number_of_genes_in_pan_genome.Rtab' } }) if 'roary/new_genes' not in config.sp: config.update_dict( config.sp, {'roary/new_genes': { 'fn': 'number_of_new_genes.Rtab' }}) if 'roary/unique_genes' not in config.sp: config.update_dict( config.sp, {'roary/unique_genes': { 'fn': 'number_of_unique_genes.Rtab' }}) # Some additional filename cleaning config.fn_clean_exts.extend([ '_SummaryStatistics', '.abricate_summary', '_mashdist', 'cgpipeline', '.blobplot.stats', '.blobDB', '_clean_PE1.fastq', ])
def quartet_dnaseq_report_execution_start(): """ Code to execute after the config files and command line flags have been parsedself. This setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin if config.kwargs.get('disable_plugin', True): return None log.info('Running Quartet DNA MultiQC Plugin v{}'.format( config.quartet_dnaseq_report_version)) # Add to the main MultiQC config object. # User config files have already been loaded at this point # so we check whether the value is already set. This is to avoid # clobbering values that have been customised by users. # Module-data_generation_information if 'data_generation_information/information' not in config.sp: config.update_dict( config.sp, { 'data_generation_information/information': { 'fn_re': r'.*information.json$' } }) # Module-pre_alignment_qc if 'pre_alignment_qc/summary' not in config.sp: config.update_dict( config.sp, {'pre_alignment_qc/summary': { 'fn_re': r'^pre_alignment.txt$' }}) if 'pre_alignment_qc/fastqc_data' not in config.sp: config.update_dict( config.sp, {'pre_alignment_qc/fastqc_data': { 'fn_re': r'fastqc_data.txt' }}) if 'pre_alignment_qc/fastqc_zip' not in config.sp: config.update_dict( config.sp, {'pre_alignment_qc/fastqc_zip': { 'fn_re': r'.*_fastqc.zip' }}) if 'pre_alignment_qc/fastqc_theoretical_gc' not in config.sp: config.update_dict( config.sp, { 'pre_alignment_qc/fastqc_theoretical_gc': { 'fn_re': r'^fastqc_theoretical_gc_hg38_genome.txt$' } }) # Module-post_alignment_qc if 'post_alignment_qc/summary' not in config.sp: config.update_dict( config.sp, {'post_alignment_qc/summary': { 'fn_re': r'^post_alignment.txt$' }}) if 'post_alignment_qc/bamqc/genome_results' not in config.sp: config.update_dict( config.sp, { 'post_alignment_qc/bamqc/genome_results': { 'fn_re': r'^genome_results.txt$' } }) if 'post_alignment_qc/bamqc/coverage' not in config.sp: config.update_dict( config.sp, { 'post_alignment_qc/bamqc/coverage': { 'fn_re': r'^coverage_histogram.txt$' } }) if 'post_alignment_qc/bamqc/insert_size' not in config.sp: config.update_dict( config.sp, { 'post_alignment_qc/bamqc/insert_size': { 'fn_re': r'^insert_size_histogram.txt$' } }) if 'post_alignment_qc/bamqc/genome_fraction' not in config.sp: config.update_dict( config.sp, { 'post_alignment_qc/bamqc/genome_fraction': { 'fn_re': r'^genome_fraction_coverage.txt$' } }) if 'post_alignment_qc/bamqc/gc_dist' not in config.sp: config.update_dict( config.sp, { 'post_alignment_qc/bamqc/gc_dist': { 'fn_re': r'^mapped_reads_gc-content_distribution.txt$' } }) # Module-variant_calling_qc if 'variant_calling_qc/snv_indel_summary' not in config.sp: config.update_dict( config.sp, { 'variant_calling_qc/snv_indel_summary': { 'fn_re': r'^variants.calling.qc.txt$' } }) if 'variant_calling_qc/mendelian_summary' not in config.sp: config.update_dict(config.sp, { 'variant_calling_qc/mendelian_summary': { 'fn_re': r'.*summary.txt$' } }) if 'variant_calling_qc/history' not in config.sp: config.update_dict( config.sp, {'variant_calling_qc/history': { 'fn_re': r'^history.txt$' }}) config.module_order = [ 'data_generation_information', 'pre_alignment_qc', 'post_alignment_qc', 'variant_calling_qc', 'supplementary' ] config.exclude_modules = ['fastqc', 'fastq_screen', 'qualimap'] config.log_filesize_limit = 2000000000
def atacseq_report_execution_start(): """ Code to execute after the config files and command line flags have been parsed self. this setuptools hook is the earliest that will be able to use custom command line flags. """ # Halt execution if we've disabled the plugin if config.kwargs.get('disable_atacseq_report', True): return None log.info( "Running atacseq_report MultiQC Plugin v{}, use --disable-atacseq-report to disable" .format(config.atacseq_report_version)) # Add to the search patterns used by atacseq module if 'atacseq' not in config.sp: config.update_dict( config.sp, {'atacseq': { 'fn': '*.stats.tsv', 'contents': 'frip' }}) log.info("updated config.sp for atacseq") if 'atacseq/tss' not in config.sp: config.update_dict( config.sp, {'atacseq/tss': { 'fn': '*TSS.csv', 'contents': 'count' }}) # Create symlink for the web server if hasattr(config, 'base_url') and hasattr( config, 'project_uuid') and hasattr(config, 'public_html_folder'): project_url = os.path.join(config.base_url, config.project_uuid) os.chdir(config.public_html_folder) if not os.path.islink( os.path.join(config.public_html_folder, config.project_uuid)): # The symlink has to be relative so that the web server can locate the project folder relative_path = os.path.relpath(config.project_path) os.symlink(relative_path, config.project_uuid) log.info('## You can access the project report from: ##\n{}\n'.format( os.path.join(project_url, 'atacseq_report', 'multiqc_report.html'))) else: log.error( 'Please provide base_url, project_uuid and public_html_folder in the configuration file' ) exit(1) # Setup ATACseq report folder and UCSC track hub if hasattr(config, 'sample_annotation'): with open(config.sample_annotation, 'r') as sas: sas_reader = csv.DictReader(sas) samples_dict = {} for row in sas_reader: if 'sample_name' in row and row[ 'sample_name'] not in samples_dict: samples_dict[row['sample_name']] = row log.info( 'There were {} samples in the sample annotation sheet'.format( len(samples_dict))) report_dir = os.path.join(config.project_path, 'atacseq_report') if not os.path.exists(report_dir): os.mkdir(report_dir) config.output_dir = report_dir config.analysis_dir = [report_dir] os.chdir(report_dir) # Create symbolic links to relevant pipeline output files for use in report generation for sample_name in samples_dict: source_path = os.path.join('../', 'atacseq_results', sample_name, '{}.stats.tsv'.format(sample_name)) if not os.path.islink('{}.stats.tsv'.format(sample_name)): os.symlink( source_path, '{}.stats.tsv'.format(sample_name), ) source_path = os.path.join( '../', 'atacseq_results', sample_name, '{}.tss_histogram.csv'.format(sample_name)) if not os.path.islink('{}_TSS.csv'.format(sample_name)): os.symlink(source_path, '{}_TSS.csv'.format(sample_name)) source_path = os.path.join('../', 'atacseq_results', sample_name, 'mapped', '{}.txt'.format(sample_name)) if not os.path.islink('{}.txt'.format(sample_name)): os.symlink(source_path, '{}.txt'.format(sample_name)) source_path = os.path.join('../', 'atacseq_results', sample_name, 'mapped', '{}.fastp.json'.format(sample_name)) if not os.path.islink('{}.fastp.json'.format(sample_name)): os.symlink(source_path, '{}.fastp.json'.format(sample_name)) source_path = os.path.join( '../', 'atacseq_results', sample_name, 'mapped', '{}.samblaster.log'.format(sample_name)) if not os.path.islink('{}.samblaster.log'.format(sample_name)): os.symlink(source_path, '{}.samblaster.log'.format(sample_name)) source_path = os.path.join( '../', 'atacseq_results', sample_name, 'mapped', '{}.samtools_flagstat.log'.format(sample_name)) if not os.path.islink( '{}.samtools_flagstat.log'.format(sample_name)): os.symlink(source_path, '{}.samtools_flagstat.log'.format(sample_name)) source_path = os.path.join('../', 'atacseq_results', sample_name, 'peaks', '{}.macs2.log'.format(sample_name)) if not os.path.islink('{}.macs2.log'.format(sample_name)): os.symlink(source_path, '{}.macs2.log'.format(sample_name)) source_path = os.path.join('../', 'atacseq_results', sample_name, 'peaks', '{}_peaks.xls'.format(sample_name)) if not os.path.islink('{}_peaks.xls'.format(sample_name)): os.symlink(source_path, '{}_peaks.xls'.format(sample_name)) # Create UCSC track hub if hasattr(config, 'trackhub_dir'): hub_dir = os.path.join( config.project_path, config.trackhub_dir ) # os.path.join(config.metadata['output_dir'], 'atacseq_hub') if not os.path.exists(hub_dir): log.error('Please make sure that trackhub_dir exists') track_dir = os.path.join(hub_dir, config.genome) if not os.path.exists(track_dir): os.mkdir(track_dir) os.chdir(track_dir) # Create the bigWig links for the sample coverage tracks for sample_name in samples_dict: bigWig_path = os.path.join('../', '{}.bigWig'.format(sample_name)) if not os.path.islink('{}.bigWig'.format(sample_name)): os.symlink(bigWig_path, '{}.bigWig'.format(sample_name)) genomes_file_path = os.path.join(hub_dir, 'genomes.txt') with open(genomes_file_path, 'w') as genomes_file: genomes_text = 'genome {}\ntrackDb {}/trackDb.txt\n'.format( config.genome, config.genome) genomes_file.write(genomes_text) hub_file_path = os.path.join(hub_dir, 'hub.txt') with open(hub_file_path, 'w') as hub_file: hub_text = [ 'hub {}'.format(config.trackhub_name), 'shortLabel {}'.format(config.trackhub_name), 'longLabel {}'.format(config.trackhub_name), 'genomesFile genomes.txt', 'email {}\n'.format(config.email) ] hub_file.write('\n'.join(hub_text)) trackdb_file_path = os.path.join(hub_dir, config.genome, 'trackDb.txt') with open(trackdb_file_path, 'w') as trackdb_file: colors = [ '166,206,227', '31,120,180', '51,160,44', '251,154,153', '227,26,28', '253,191,111', '255,127,0', '202,178,214', '106,61,154', '177,89,40' ] if hasattr(config, 'trackhub_color_by'): color_groups = [] for sample_name in samples_dict: if samples_dict[sample_name][ config. trackhub_color_by] not in color_groups: color_groups.append(samples_dict[sample_name][ config.trackhub_color_by]) track_db = [ 'track {}'.format(config.trackhub_name), 'type bigWig', 'compositeTrack on', 'autoScale on', 'maxHeightPixels 32:32:8', 'shortLabel {}'.format(config.trackhub_name[:8]), 'longLabel {}'.format(config.trackhub_name), 'visibility {}'.format(config.trackhub_visibility), '', '' ] for sample_name in samples_dict: short_label = sample_name if hasattr(config, 'trackhub_short_label_column'): short_label = samples_dict[sample_name][ config.trackhub_short_label_column] track_color = '255,40,0' if hasattr(config, 'trackhub_color_by'): color_hash = hash(samples_dict[sample_name][ config.trackhub_color_by]) track_color = colors[color_hash % len(colors)] track = [ 'track {}'.format(sample_name), 'shortLabel {}'.format(short_label), 'longLabel {}'.format(sample_name), 'bigDataUrl {}.bigWig'.format(sample_name), 'parent {} on'.format(config.trackhub_name), 'type bigWig', 'windowingFunction mean', 'color {}'.format(track_color), '', '' ] track_db += track trackdb_file.write('\n'.join(track_db)) else: log.warning('Trackhubs configuration is missing!') # Finally, switch back to the report directory for scanning the stats files os.chdir(report_dir) else: log.error( 'Please provide the location of the ATACseq sample annotation sheet in the configuration file' ) exit(1)