Ejemplos de update_dict en Python, ejemplos de multiqc.utils.config.update_dict en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: hooks.py Proyecto: c-BIG/MultiQC_NPM

def before_config():
    my_search_patterns = {
        'multiqc_npm/picard_quality_yield_metrics': {
            'fn': '*.quality_yield_metrics.txt',
            'shared': 'true'
        },
        'multiqc_npm/samtools_stats_bq': {
            'fn': '*.stats',
            'contents': 'This file was produced by samtools stats',
            'shared': 'true'
        },
        'multiqc_npm/bcftools_gtcheck': {
            'fn': '*.bcftools_gtcheck.txt',
            'shared': 'true'
        },
        'multiqc_npm/sg10k_cov_062017': {
            'fn': '*.sg10k_cov_062017.txt'
        },
        'multiqc_npm/count_variants': {
            'fn': '*.variant_counts.json'
        },
        'multiqc_npm/mosdepth': {
            'fn': '*.mosdepth.csv'
        }
    }
    config.update_dict(config.sp, my_search_patterns)
    log.info("Expanded search patterns with the following: %s",
             ", ".join(my_search_patterns.keys()))

Ejemplo n.º 2

0

Mostrar archivo

def load_config():
    my_search_patterns = {
        'sequana_coverage': {
            'fn': 'sequana_summary_coverage.json'
        },
        'sequana_pacbio_qc': {
            'fn': 'sequana_summary*.json'
        },
        'sequana_quality_control': {
            'fn': 'summary*.json'
        },
        'sequana_isoseq_qc': {
            'fn': 'sequana_summary*.json'
        },
        'sequana_isoseq': {
            'fn': 'sequana_summary*.json'
        },
        'sequana_kraken': {
            'fn': 'sequana_kraken_summary.json'
        },
        'sequana_pacbio_amplicon': {
            'fn': 'sequana_pacbio_amplicon_*.json'
        },
        'sequana_bamtools_stats': {
            'fn': 'sequana_bamtools_stats_*.txt'
        },
    }
    config.update_dict(config.sp, my_search_patterns)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: custom_code.py Proyecto: massiddamt/gatkdoc_plugin

def gatkdoc_plugin_execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.

    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Halt execution if we've disabled the plugin
    if config.kwargs.get('disable_plugin', True):
        return None

    log.info("Running GATK DepthOfCoverage MultiQC Plugin v{}".format(config.gatkdoc_plugin_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    #   so we check whether the value is already set. This is to avoid
    #   clobbering values that have been customised by users.

    # Add to the search patterns used by modules
    if 'gatkdoc/key_value_pairs' not in config.sp:
        config.update_dict( config.sp, { 'gatkdoc/key_value_pairs': { 'fn': '*.sample_summary' } } )

    # Some additional filename cleaning
    config.fn_clean_exts.extend([
        '.depthOfCov.COUNT_READS',
    ])

Ejemplo n.º 4

0

Mostrar archivo

def multiqc_ngi_config():
    """ Set up MultiQC config defaults for this package """

    # Module search patterns
    ngi_search_patterns = {
        'ngi_rnaseq/featureCounts_biotype': {
            'fn': '*_biotype_counts.txt'
        },
        'ngi_rnaseq/dupradar_intslope': {
            'fn': '*intercept_slope.txt'
        },
        'ngi_rnaseq/dupradar_gml_intslope': {
            'fn': '*_duprateExpDensCurve.txt'
        },
        'ngi_rnaseq/heatmap': {
            'fn': 'log2CPM_sample_distances.txt'
        },
        'ngi_rnaseq/mds_plot': {
            'fn': 'edgeR_MDS_plot_coordinates.txt'
        },
    }
    config.update_dict(config.sp, ngi_search_patterns)

    # Use the NGI template by default
    config.template = 'ngi'

    # Push parsed results to StatusDB
    config.push_statusdb = True

    # Additional filename cleaning for NGI pipelines
    config.fn_clean_exts.extend(['.bowtie_log', '.featureCounts'])

    # Ignore intermediate files from WGS Piper results
    config.fn_ignore_paths.extend([
        '*/piper_ngi/01_raw_alignments/*',
        '*/piper_ngi/02_preliminary_alignment_qc/*',
        '*/piper_ngi/03_genotype_concordance/*',
        '*/piper_ngi/04_merged_alignments/*'
    ])

    # Save generated reports remotely on the tools server
    config.save_remote = False
    config.remote_sshkey = None
    config.remote_port = None
    config.remote_destination = None

    # General MultiQC_NGI options
    config.disable_ngi = False

Ejemplo n.º 5

0

Mostrar archivo

def example_plugin_execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.

    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Halt execution if we've disabled the plugin
    if config.kwargs.get('disable_plugin', True):
        return None

    log.info("Running Example MultiQC Plugin v{}".format(
        config.example_plugin_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    #   so we check whether the value is already set. This is to avoid
    #   clobbering values that have been customised by users.

    # Add to the search patterns used by modules
    if 'my_example/key_value_pairs' not in config.sp:
        config.update_dict(
            config.sp,
            {'my_example/key_value_pairs': {
                'fn': 'my_plugin_output.tsv'
            }})
    if 'my_example/plot_data' not in config.sp:
        config.update_dict(
            config.sp,
            {'my_example/plot_data': {
                'fn': 'my_plugin_plotdata.tsv'
            }})

    # Some additional filename cleaning
    config.fn_clean_exts.extend(['.my_tool_extension', '.removeMetoo'])

    # Ignore some files generated by the custom pipeline
    config.fn_ignore_paths.extend([
        '*/my_awesome_pipeline/fake_news/*',
        '*/my_awesome_pipeline/red_herrings/*',
        '*/my_awesome_pipeline/noisy_data/*', '*/my_awesome_pipeline/rubbish/*'
    ])

Ejemplo n.º 6

0

Mostrar archivo

def mga_plugin_execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.

    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    log.debug("Running Multi Genome Alignment MultiQC Plugin v{}".format(
        config.mga_plugin_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    #   so we check whether the value is already set. This is to avoid
    #   clobbering values that have been customised by users.

    # Add to the search patterns used by modules
    if 'mga' not in config.sp:
        config.update_dict(config.sp, {'mga': {'fn': '*.mga.xml'}})

Ejemplo n.º 7

0

Mostrar archivo

Archivo: artic.py Proyecto: will-rowe/artic-mqc

def artic_mqc_execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.

    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Halt execution if we've disabled the plugin
    if config.kwargs.get('disable_plugin', True):
        return None

    log.info("Running ARTIC pipeline MultiQC Plugin v{}".format(
        config.artic_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    #   so we check whether the value is already set. This is to avoid
    #   clobbering values that have been customised by users.
    # Add to the search patterns used by modules
    if 'artic_mqc/aligntrim_reports' not in config.sp:
        config.update_dict(
            config.sp,
            {'artic_mqc/aligntrim_reports': {
                'fn': '*.alignreport.txt'
            }})
    if 'artic_mqc/vcfcheck_reports' not in config.sp:
        config.update_dict(
            config.sp,
            {'artic_mqc/vcfcheck_reports': {
                'fn': '*.vcfreport.txt'
            }})

    # Ignore some files generated by the custom pipeline
    config.fn_ignore_paths.extend(
        ['*/*.fasta', '*/*.hdf', '*/*primertrimmed*'])

    # Some additional filename cleaning
    config.fn_clean_exts.extend(
        ['.sorted', '.trimmed', '.rg', '.pass', '.alignreport'])

Ejemplo n.º 8

0

Mostrar archivo

Archivo: multiqc_blr.py Proyecto: pontushojer/MultiQC_BLR

def execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.

    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Halt execution if we've disabled the plugin
    if config.kwargs.get('disable_plugin', True):
        return None

    log.info("Running MultiQC BLR Plugin v{}".format(
        config.multiqc_blr_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    #   so we check whether the value is already set. This is to avoid
    #   clobbering values that have been customised by users.

    # Increase filesize limit, large log files might be missed otherwise
    config.mqc_add_config({'log_filesize_limit': 100_000_000})

    # Add to the search patterns used by modules
    if 'stats' not in config.sp:
        # Current looking for file ending with ".log" and having the content "SETTINGS FOR:" on the first line
        config.update_dict(
            config.sp, {
                'stats': {
                    'fn': '*.log',
                    'contents_re': '^SETTINGS FOR:*',
                    'num_lines': 1
                }
            })

    if 'hapcut2/phasing_stats' not in config.sp:
        # Current looking for file containing the string "switch rate:" on the first line.
        config.update_dict(
            config.sp, {
                'hapcut2/phasing_stats': {
                    'fn': '*.txt',
                    'contents_re': '^switch rate:*',
                    'num_lines': 1
                }
            })
    if 'hapcut2/phaseblocks' not in config.sp:
        # Currently looking for file containing the string "switch rate:" on the first line.
        config.update_dict(
            config.sp, {
                'hapcut2/phaseblocks': {
                    'fn': '*.phase',
                    'contents_re': "^BLOCK:*",
                    'num_lines': 1
                }
            })

Ejemplo n.º 9

0

Mostrar archivo

def execution_start():
    """Code to execute after the config files and
    command line flags have been parsedself.
    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Add to the search patterns used by modules
    if "ALFA" not in config.sp:
        config.update_dict(config.sp,
                           {"ALFA": {
                               "fn": "*ALFA_feature_counts.tsv"
                           }})
    if "tin-score" not in config.sp:
        config.update_dict(config.sp, {"tin-score": {"fn": "TIN_score.tsv"}})
    if "zpca/pca" not in config.sp:
        config.update_dict(config.sp, {"zpca/pca": {"fn": "PCA.tsv"}})
    if "zpca/scree" not in config.sp:
        config.update_dict(config.sp, {"zpca/scree": {"fn": "scree.tsv"}})

Ejemplo n.º 10

0

Mostrar archivo

Archivo: multiqc_umccr.py Proyecto: scwatts/umccrise

    def __init__(self):
        log.debug('Running config_loaded hook. Loading specific settings and metadata')

        with open(join(dirname(__file__), 'multiqc_config.yaml')) as f:
            cfg = yaml.load(f)
        config.update_dict(config.__dict__, cfg)

Ejemplo n.º 11

0

Mostrar archivo

def quartet_rnaseq_report_execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.

    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Halt execution if we've disabled the plugin
    # if config.kwargs.get('disable_plugin', True):
    #    return None

    log.info("Running Example MultiQC Plugin v{}".format(
        config.quartet_rnaseq_report_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    #   so we check whether the value is already set. This is to avoid
    #   clobbering values that have been customised by users.

    ### Module-rnaseq_data_generation_information
    if 'rnaseq_data_generation_information/information' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_data_generation_information/information': {
                    'fn_re': '^information.json$'
                }
            })

    ### Module-rnaseq_performance_assessment
    if 'rnaseq_performance_assessment/quality_score' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_performance_assessment/quality_score': {
                    'fn_re': '^quality_score.txt$'
                }
            })

    if 'rnaseq_performance_assessment/performance_of_absolute_exp' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_performance_assessment/performance_of_absolute_exp': {
                    'fn_re': '^performance_of_absolute_exp.txt$'
                }
            })

    if 'rnaseq_performance_assessment/performance_of_relative_exp' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_performance_assessment/performance_of_relative_exp': {
                    'fn_re': '^performance_of_relative_exp.txt$'
                }
            })

    if 'rnaseq_performance_assessment/pca_with_snr' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_performance_assessment/pca_with_snr': {
                    'fn_re': '^pca_with_snr.txt$'
                }
            })

    if 'rnaseq_performance_assessment/relative_exp_correlation' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_performance_assessment/relative_exp_correlation': {
                    'fn_re': '^relative_exp_correlation.txt$'
                }
            })

    if 'rnaseq_performance_assessment/absolute_exp_correlation' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_performance_assessment/absolute_exp_correlation': {
                    'fn_re': '^absolute_exp_correlation.txt$'
                }
            })

    if 'rnaseq_performance_assessment/qc_metrics_summary' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_performance_assessment/qc_metrics_summary': {
                    'fn_re': '^qc_metrics_summary.txt$'
                }
            })
    ### Module-rnaseq_raw_qc
    if 'rnaseq_raw_qc/zip' not in config.sp:
        config.update_dict(config.sp,
                           {'rnaseq_raw_qc/zip': {
                               'fn': '*_fastqc.zip'
                           }})

    if 'rnaseq_raw_qc/data' not in config.sp:
        config.update_dict(config.sp,
                           {'rnaseq_raw_qc/data': {
                               'fn': 'fastqc_data.txt'
                           }})

    if 'rnaseq_raw_qc/fastq_screen' not in config.sp:
        config.update_dict(
            config.sp, {'rnaseq_raw_qc/fastq_screen': {
                'fn': '*_screen.txt'
            }})
    ### Module-post_alignment_qc_modules
    if 'rnaseq_post_alignment_qc/bam_qc/genome_results' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_post_alignment_qc/bam_qc/genome_results': {
                    'fn': 'genome_results.txt'
                }
            })

    if 'rnaseq_post_alignment_qc/bam_qc/coverage' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_post_alignment_qc/bam_qc/coverage': {
                    'fn': 'coverage_histogram.txt'
                }
            })

    if 'rnaseq_post_alignment_qc/bam_qc/insert_size' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_post_alignment_qc/bam_qc/insert_size': {
                    'fn': 'insert_size_histogram.txt'
                }
            })

    if 'rnaseq_post_alignment_qc/bam_qc/gc_dist' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_post_alignment_qc/bam_qc/gc_dist': {
                    'fn': 'mapped_reads_gc-content_distribution.txt'
                }
            })

    if 'rnaseq_post_alignment_qc/rnaseq_qc/rnaseq_qc_results' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_post_alignment_qc/rnaseq_qc/rnaseq_qc_results': {
                    'fn': 'rnaseq_qc_results.txt'
                }
            })

    if 'rnaseq_post_alignment_qc/rnaseq_qc/coverage' not in config.sp:
        config.update_dict(
            config.sp, {
                'rnaseq_post_alignment_qc/rnaseq_qc/coverage': {
                    'fn': 'coverage_profile_along_genes_*'
                }
            })

    # # Some additional filename cleaning
    # config.fn_clean_exts.extend([
    #     '.my_tool_extension',
    #     '.removeMetoo'
    # ])

    # # Ignore some files generated by the custom pipeline
    # config.fn_ignore_paths.extend([
    #     '*/my_awesome_pipeline/fake_news/*',
    #     '*/my_awesome_pipeline/red_herrings/*',
    #     '*/my_awesome_pipeline/noisy_data/*',
    #     '*/my_awesome_pipeline/rubbish/*'
    # ])

    config.module_order = [
        'rnaseq_data_generation_information', 'rnaseq_performance_assessment',
        'rnaseq_raw_qc', 'rnaseq_post_alignment_qc', 'rnaseq_supplementary'
    ]
    config.exclude_modules = ['fastqc', 'fastq_screen', 'qualimap']

    config.log_filesize_limit = 2000000000

Ejemplo n.º 12

0

Mostrar archivo

def UPHL_plugin_execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.
    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Halt execution if we've disabled the plugin
    if config.kwargs.get('disable_plugin', True):
        return None

    log.info("Running MultiQC UPHL Plugin v{}".format(
        config.multiqc_uphl_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    #   so we check whether the value is already set. This is to avoid
    #   clobbering values that have been customised by users.

    # Add to the search patterns used by modules
    if 'mash' not in config.sp:
        config.update_dict(config.sp, {'mash': {'fn': '*_mashdist.txt'}})
    if 'abricate' not in config.sp:
        config.update_dict(config.sp,
                           {'abricate': {
                               'fn': '*abricate_summary.txt'
                           }})
    if 'seqyclean' not in config.sp:
        config.update_dict(config.sp,
                           {'seqyclean': {
                               'fn': '*SummaryStatistics.tsv'
                           }})
    if 'cgpipeline' not in config.sp:
        config.update_dict(config.sp,
                           {'cgpipeline': {
                               'fn': '*cgpipeline.txt'
                           }})
    if 'blobtools/json' not in config.sp:
        config.update_dict(config.sp,
                           {'blobtools/json': {
                               'fn': '*.blobDB.json'
                           }})
    if 'blobtools/stats' not in config.sp:
        config.update_dict(config.sp,
                           {'blobtools/stats': {
                               'fn': '*.blobplot.stats.txt'
                           }})
    if 'blobtools/table' not in config.sp:
        config.update_dict(config.sp,
                           {'blobtools/table': {
                               'fn': '*.blobDB.table.txt'
                           }})
    if 'seqsero' not in config.sp:
        config.update_dict(config.sp,
                           {'seqsero': {
                               'fn': 'Seqsero_result.txt'
                           }})
    if 'roary/qc' not in config.sp:
        config.update_dict(config.sp, {'roary/qc': {'fn': 'qc_report.csv'}})
    if 'roary/summary' not in config.sp:
        config.update_dict(config.sp,
                           {'roary/summary': {
                               'fn': 'summary_statistics.txt'
                           }})
    if 'roary/gene_presence' not in config.sp:
        config.update_dict(
            config.sp,
            {'roary/gene_presence': {
                'fn': 'gene_presence_absence.Rtab'
            }})
    if 'roary/conserved_genes' not in config.sp:
        config.update_dict(config.sp, {
            'roary/conserved_genes': {
                'fn': 'number_of_conserved_genes.Rtab'
            }
        })
    if 'roary/total_genes' not in config.sp:
        config.update_dict(config.sp, {
            'roary/total_genes': {
                'fn': 'number_of_genes_in_pan_genome.Rtab'
            }
        })
    if 'roary/new_genes' not in config.sp:
        config.update_dict(
            config.sp, {'roary/new_genes': {
                'fn': 'number_of_new_genes.Rtab'
            }})
    if 'roary/unique_genes' not in config.sp:
        config.update_dict(
            config.sp,
            {'roary/unique_genes': {
                'fn': 'number_of_unique_genes.Rtab'
            }})

    # Some additional filename cleaning
    config.fn_clean_exts.extend([
        '_SummaryStatistics',
        '.abricate_summary',
        '_mashdist',
        'cgpipeline',
        '.blobplot.stats',
        '.blobDB',
        '_clean_PE1.fastq',
    ])

Ejemplo n.º 13

0

Mostrar archivo

Archivo: custom_code.py Proyecto: clinico-omics/quartet-dnaseq-report

def quartet_dnaseq_report_execution_start():
    """ Code to execute after the config files and
    command line flags have been parsedself.

    This setuptools hook is the earliest that will be able
    to use custom command line flags.
    """

    # Halt execution if we've disabled the plugin
    if config.kwargs.get('disable_plugin', True):
        return None

    log.info('Running Quartet DNA MultiQC Plugin v{}'.format(
        config.quartet_dnaseq_report_version))

    # Add to the main MultiQC config object.
    # User config files have already been loaded at this point
    # so we check whether the value is already set. This is to avoid
    # clobbering values that have been customised by users.

    # Module-data_generation_information
    if 'data_generation_information/information' not in config.sp:
        config.update_dict(
            config.sp, {
                'data_generation_information/information': {
                    'fn_re': r'.*information.json$'
                }
            })

    # Module-pre_alignment_qc
    if 'pre_alignment_qc/summary' not in config.sp:
        config.update_dict(
            config.sp,
            {'pre_alignment_qc/summary': {
                'fn_re': r'^pre_alignment.txt$'
            }})

    if 'pre_alignment_qc/fastqc_data' not in config.sp:
        config.update_dict(
            config.sp,
            {'pre_alignment_qc/fastqc_data': {
                'fn_re': r'fastqc_data.txt'
            }})

    if 'pre_alignment_qc/fastqc_zip' not in config.sp:
        config.update_dict(
            config.sp,
            {'pre_alignment_qc/fastqc_zip': {
                'fn_re': r'.*_fastqc.zip'
            }})

    if 'pre_alignment_qc/fastqc_theoretical_gc' not in config.sp:
        config.update_dict(
            config.sp, {
                'pre_alignment_qc/fastqc_theoretical_gc': {
                    'fn_re': r'^fastqc_theoretical_gc_hg38_genome.txt$'
                }
            })

    # Module-post_alignment_qc
    if 'post_alignment_qc/summary' not in config.sp:
        config.update_dict(
            config.sp,
            {'post_alignment_qc/summary': {
                'fn_re': r'^post_alignment.txt$'
            }})

    if 'post_alignment_qc/bamqc/genome_results' not in config.sp:
        config.update_dict(
            config.sp, {
                'post_alignment_qc/bamqc/genome_results': {
                    'fn_re': r'^genome_results.txt$'
                }
            })

    if 'post_alignment_qc/bamqc/coverage' not in config.sp:
        config.update_dict(
            config.sp, {
                'post_alignment_qc/bamqc/coverage': {
                    'fn_re': r'^coverage_histogram.txt$'
                }
            })

    if 'post_alignment_qc/bamqc/insert_size' not in config.sp:
        config.update_dict(
            config.sp, {
                'post_alignment_qc/bamqc/insert_size': {
                    'fn_re': r'^insert_size_histogram.txt$'
                }
            })

    if 'post_alignment_qc/bamqc/genome_fraction' not in config.sp:
        config.update_dict(
            config.sp, {
                'post_alignment_qc/bamqc/genome_fraction': {
                    'fn_re': r'^genome_fraction_coverage.txt$'
                }
            })

    if 'post_alignment_qc/bamqc/gc_dist' not in config.sp:
        config.update_dict(
            config.sp, {
                'post_alignment_qc/bamqc/gc_dist': {
                    'fn_re': r'^mapped_reads_gc-content_distribution.txt$'
                }
            })

    # Module-variant_calling_qc
    if 'variant_calling_qc/snv_indel_summary' not in config.sp:
        config.update_dict(
            config.sp, {
                'variant_calling_qc/snv_indel_summary': {
                    'fn_re': r'^variants.calling.qc.txt$'
                }
            })

    if 'variant_calling_qc/mendelian_summary' not in config.sp:
        config.update_dict(config.sp, {
            'variant_calling_qc/mendelian_summary': {
                'fn_re': r'.*summary.txt$'
            }
        })

    if 'variant_calling_qc/history' not in config.sp:
        config.update_dict(
            config.sp,
            {'variant_calling_qc/history': {
                'fn_re': r'^history.txt$'
            }})

    config.module_order = [
        'data_generation_information', 'pre_alignment_qc', 'post_alignment_qc',
        'variant_calling_qc', 'supplementary'
    ]

    config.exclude_modules = ['fastqc', 'fastq_screen', 'qualimap']

    config.log_filesize_limit = 2000000000

Ejemplo n.º 14

0

Mostrar archivo

Archivo: atacseq_report.py Proyecto: berguner/atacseq_pipeline

def atacseq_report_execution_start():
    """
    Code to execute after the config files and
    command line flags have been parsed self.
    this setuptools hook is the earliest that will be able
    to use custom command line flags.
    """
    # Halt execution if we've disabled the plugin
    if config.kwargs.get('disable_atacseq_report', True):
        return None

    log.info(
        "Running atacseq_report MultiQC Plugin v{}, use --disable-atacseq-report to disable"
        .format(config.atacseq_report_version))

    # Add to the search patterns used by atacseq module
    if 'atacseq' not in config.sp:
        config.update_dict(
            config.sp, {'atacseq': {
                'fn': '*.stats.tsv',
                'contents': 'frip'
            }})
        log.info("updated config.sp for atacseq")
    if 'atacseq/tss' not in config.sp:
        config.update_dict(
            config.sp,
            {'atacseq/tss': {
                'fn': '*TSS.csv',
                'contents': 'count'
            }})

    # Create symlink for the web server
    if hasattr(config, 'base_url') and hasattr(
            config, 'project_uuid') and hasattr(config, 'public_html_folder'):
        project_url = os.path.join(config.base_url, config.project_uuid)
        os.chdir(config.public_html_folder)
        if not os.path.islink(
                os.path.join(config.public_html_folder, config.project_uuid)):
            # The symlink has to be relative so that the web server can locate the project folder
            relative_path = os.path.relpath(config.project_path)
            os.symlink(relative_path, config.project_uuid)
        log.info('## You can access the project report from: ##\n{}\n'.format(
            os.path.join(project_url, 'atacseq_report',
                         'multiqc_report.html')))
    else:
        log.error(
            'Please provide base_url, project_uuid and public_html_folder in the configuration file'
        )
        exit(1)

    # Setup ATACseq report folder and UCSC track hub
    if hasattr(config, 'sample_annotation'):
        with open(config.sample_annotation, 'r') as sas:
            sas_reader = csv.DictReader(sas)
            samples_dict = {}
            for row in sas_reader:
                if 'sample_name' in row and row[
                        'sample_name'] not in samples_dict:
                    samples_dict[row['sample_name']] = row
            log.info(
                'There were {} samples in the sample annotation sheet'.format(
                    len(samples_dict)))
            report_dir = os.path.join(config.project_path, 'atacseq_report')
            if not os.path.exists(report_dir):
                os.mkdir(report_dir)
            config.output_dir = report_dir
            config.analysis_dir = [report_dir]
            os.chdir(report_dir)
            # Create symbolic links to relevant pipeline output files for use in report generation
            for sample_name in samples_dict:
                source_path = os.path.join('../', 'atacseq_results',
                                           sample_name,
                                           '{}.stats.tsv'.format(sample_name))
                if not os.path.islink('{}.stats.tsv'.format(sample_name)):
                    os.symlink(
                        source_path,
                        '{}.stats.tsv'.format(sample_name),
                    )
                source_path = os.path.join(
                    '../', 'atacseq_results', sample_name,
                    '{}.tss_histogram.csv'.format(sample_name))
                if not os.path.islink('{}_TSS.csv'.format(sample_name)):
                    os.symlink(source_path, '{}_TSS.csv'.format(sample_name))
                source_path = os.path.join('../', 'atacseq_results',
                                           sample_name, 'mapped',
                                           '{}.txt'.format(sample_name))
                if not os.path.islink('{}.txt'.format(sample_name)):
                    os.symlink(source_path, '{}.txt'.format(sample_name))
                source_path = os.path.join('../', 'atacseq_results',
                                           sample_name, 'mapped',
                                           '{}.fastp.json'.format(sample_name))
                if not os.path.islink('{}.fastp.json'.format(sample_name)):
                    os.symlink(source_path,
                               '{}.fastp.json'.format(sample_name))
                source_path = os.path.join(
                    '../', 'atacseq_results', sample_name, 'mapped',
                    '{}.samblaster.log'.format(sample_name))
                if not os.path.islink('{}.samblaster.log'.format(sample_name)):
                    os.symlink(source_path,
                               '{}.samblaster.log'.format(sample_name))
                source_path = os.path.join(
                    '../', 'atacseq_results', sample_name, 'mapped',
                    '{}.samtools_flagstat.log'.format(sample_name))
                if not os.path.islink(
                        '{}.samtools_flagstat.log'.format(sample_name)):
                    os.symlink(source_path,
                               '{}.samtools_flagstat.log'.format(sample_name))
                source_path = os.path.join('../', 'atacseq_results',
                                           sample_name, 'peaks',
                                           '{}.macs2.log'.format(sample_name))
                if not os.path.islink('{}.macs2.log'.format(sample_name)):
                    os.symlink(source_path, '{}.macs2.log'.format(sample_name))
                source_path = os.path.join('../', 'atacseq_results',
                                           sample_name, 'peaks',
                                           '{}_peaks.xls'.format(sample_name))
                if not os.path.islink('{}_peaks.xls'.format(sample_name)):
                    os.symlink(source_path, '{}_peaks.xls'.format(sample_name))
            # Create UCSC track hub
            if hasattr(config, 'trackhub_dir'):
                hub_dir = os.path.join(
                    config.project_path, config.trackhub_dir
                )  # os.path.join(config.metadata['output_dir'], 'atacseq_hub')
                if not os.path.exists(hub_dir):
                    log.error('Please make sure that trackhub_dir exists')
                track_dir = os.path.join(hub_dir, config.genome)
                if not os.path.exists(track_dir):
                    os.mkdir(track_dir)
                os.chdir(track_dir)
                # Create the bigWig links for the sample coverage tracks
                for sample_name in samples_dict:
                    bigWig_path = os.path.join('../',
                                               '{}.bigWig'.format(sample_name))
                    if not os.path.islink('{}.bigWig'.format(sample_name)):
                        os.symlink(bigWig_path,
                                   '{}.bigWig'.format(sample_name))
                genomes_file_path = os.path.join(hub_dir, 'genomes.txt')
                with open(genomes_file_path, 'w') as genomes_file:
                    genomes_text = 'genome {}\ntrackDb {}/trackDb.txt\n'.format(
                        config.genome, config.genome)
                    genomes_file.write(genomes_text)
                hub_file_path = os.path.join(hub_dir, 'hub.txt')
                with open(hub_file_path, 'w') as hub_file:
                    hub_text = [
                        'hub {}'.format(config.trackhub_name),
                        'shortLabel {}'.format(config.trackhub_name),
                        'longLabel {}'.format(config.trackhub_name),
                        'genomesFile genomes.txt',
                        'email {}\n'.format(config.email)
                    ]
                    hub_file.write('\n'.join(hub_text))

                trackdb_file_path = os.path.join(hub_dir, config.genome,
                                                 'trackDb.txt')
                with open(trackdb_file_path, 'w') as trackdb_file:
                    colors = [
                        '166,206,227', '31,120,180', '51,160,44',
                        '251,154,153', '227,26,28', '253,191,111', '255,127,0',
                        '202,178,214', '106,61,154', '177,89,40'
                    ]
                    if hasattr(config, 'trackhub_color_by'):
                        color_groups = []
                        for sample_name in samples_dict:
                            if samples_dict[sample_name][
                                    config.
                                    trackhub_color_by] not in color_groups:
                                color_groups.append(samples_dict[sample_name][
                                    config.trackhub_color_by])

                    track_db = [
                        'track {}'.format(config.trackhub_name), 'type bigWig',
                        'compositeTrack on', 'autoScale on',
                        'maxHeightPixels 32:32:8',
                        'shortLabel {}'.format(config.trackhub_name[:8]),
                        'longLabel {}'.format(config.trackhub_name),
                        'visibility {}'.format(config.trackhub_visibility), '',
                        ''
                    ]
                    for sample_name in samples_dict:
                        short_label = sample_name
                        if hasattr(config, 'trackhub_short_label_column'):
                            short_label = samples_dict[sample_name][
                                config.trackhub_short_label_column]
                        track_color = '255,40,0'
                        if hasattr(config, 'trackhub_color_by'):
                            color_hash = hash(samples_dict[sample_name][
                                config.trackhub_color_by])
                            track_color = colors[color_hash % len(colors)]
                        track = [
                            'track {}'.format(sample_name),
                            'shortLabel {}'.format(short_label),
                            'longLabel {}'.format(sample_name),
                            'bigDataUrl {}.bigWig'.format(sample_name),
                            'parent {} on'.format(config.trackhub_name),
                            'type bigWig', 'windowingFunction mean',
                            'color {}'.format(track_color), '', ''
                        ]
                        track_db += track
                    trackdb_file.write('\n'.join(track_db))
            else:
                log.warning('Trackhubs configuration is missing!')
        # Finally, switch back to the report directory for scanning the stats files
        os.chdir(report_dir)
    else:
        log.error(
            'Please provide the location of the ATACseq sample annotation sheet in the configuration file'
        )
        exit(1)