def proc_args(argv): info(' '.join(sys.argv)) info() description = 'This script generates target QC reports for each BAM provided as an input. ' \ 'Usage: ' + basename(__file__) + ' sample2bam.tsv --bed target.bed --contols sample1:sample2 -o results_dir' parser = OptionParser(description=description, usage=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'seq2c')) parser.add_option('--bed', dest='bed', help='BED file to run Seq2C analysis') parser.add_option('-c', '--controls', dest='controls', help='Optional control sample names for Seq2C. For multiple controls, separate them using :') parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.') parser.add_option('--no-prep-bed', dest='prep_bed', help=SUPPRESS_HELP, action='store_false', default=True) (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) == 0: parser.print_usage() sys.exit(1) if len(args) == 1 and not args[0].endswith('.bam'): sample_names, bam_fpaths = read_samples(verify_file(args[0], is_critical=True, description='Input sample2bam.tsv')) bam_by_sample = OrderedDict() for s, b in zip(sample_names, bam_fpaths): bam_by_sample[s] = b else: bam_by_sample = find_bams(args) run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed')) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) check_genome_resources(cnf) cnf.output_dir = adjust_path(cnf.output_dir) verify_dir(dirname(cnf.output_dir), is_critical=True) safe_mkdir(cnf.output_dir) if not cnf.project_name: cnf.project_name = basename(cnf.output_dir) info('Project name: ' + cnf.project_name) cnf.proc_name = 'Seq2C' set_up_dirs(cnf) samples = [ source.TargQC_Sample(name=s_name, dirpath=join(cnf.output_dir, s_name), bam=bam_fpath) for s_name, bam_fpath in bam_by_sample.items()] info('Samples: ') for s in samples: info(' ' + s.name) samples.sort(key=lambda _s: _s.key_to_sort()) target_bed = verify_bed(cnf.bed, is_critical=True) if cnf.bed else None if not cnf.only_summary: cnf.qsub_runner = adjust_system_path(cnf.qsub_runner) if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.') verify_file(cnf.qsub_runner, is_critical=True) return cnf, samples, target_bed, cnf.output_dir
def proc_opts(): parser = OptionParser() add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--expose-only', dest='expose_to_ngs_server_only', action='store_true', default=False, help='Only add project to the webserver') parser.add_option('--no-expose', dest='expose', action='store_false', default=True, help='Do not expose the reports') parser.add_option('-o', dest='output_dir') parser.add_option('--bed', dest='bed', help='BED file to run targetSeq and Seq2C analysis on.') parser.add_option('--downsample-to', dest='downsample_to', type='int') (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) < 1: critical('Usage: ' + __file__ + ' *.fq.gz -o output_dir') # if len(args) < 2: # info('No dataset path specified, assuming it is the current working directory') # dataset_dirpath = adjust_path(os.getcwd()) # jira_url = args[0] fastq_fpaths = [verify_file(fpath) for fpath in args] fastq_fpaths = [fpath for fpath in fastq_fpaths if fpath] info(str(len(fastq_fpaths)) + ' fastq files') run_cnf = determine_run_cnf(opts) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) cnf.output_dir = adjust_path(cnf.output_dir) info('Writing to ' + str(cnf.output_dir)) cnf.project_name = cnf.project_name or 'preproc' if cnf.work_dir: cnf.debug = True else: all_work_dir = join(cnf.output_dir, 'work') safe_mkdir(all_work_dir) latest_fpath = join(all_work_dir, 'latest') if cnf.reuse_intermediate: cnf.work_dir = latest_fpath else: cnf.work_dir = join( all_work_dir, datetime.datetime.now().strftime("%Y-%b-%d_%H-%M")) if islink(latest_fpath): os.remove(latest_fpath) if isdir(latest_fpath): shutil.rmtree(latest_fpath) if not exists(latest_fpath): os.symlink(basename(cnf.work_dir), latest_fpath) cnf.work_dir = adjust_path(cnf.work_dir) safe_mkdir(cnf.work_dir) cnf.log_dir = join(cnf.work_dir, 'log') safe_mkdir(cnf.log_dir) set_up_log(cnf) try: subprocess.call(['chmod', '-R', 'g+w', cnf.work_dir]) except OSError: err(traceback.format_exc()) pass if cnf.samplesheet: cnf.samplesheet = verify_file(cnf.samplesheet, is_critical=True) info(' '.join(sys.argv)) info() info('Created a temporary working directory: ' + cnf.work_dir) if cnf.project_name: info('Project name: ' + cnf.project_name) if cnf.samplesheet: info('Using custom sample sheet ' + cnf.samplesheet) check_genome_resources(cnf) check_system_resources(cnf, optional=['fastq']) return cnf, cnf.output_dir, fastq_fpaths
def main(): info(' '.join(sys.argv)) info() description = 'This script generates target QC reports for each BAM provided as an input.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) parser.add_option('--work-dir', dest='work_dir', metavar='DIR') parser.add_option('--log-dir', dest='log_dir') parser.add_option('--only-summary', dest='only_summary', action='store_true') parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'targetqc')) parser.add_option('--reannotate', dest='reannotate', action='store_true', default=False, help='re-annotate BED file with gene names') parser.add_option('--dedup', dest='dedup', action='store_true', default=False, help='count duplicates in coverage metrics') parser.add_option('--bed', dest='bed', help='BED file to run targetSeq and Seq2C analysis on.') parser.add_option( '--exons', '--exome', '--features', dest='features', help= 'Annotated CDS/Exon/Gene/Transcripts BED file to make targetSeq exon/amplicon regions reports.' ) (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) == 0: critical('No BAMs provided to input.') bam_fpaths = list(set([abspath(a) for a in args])) bad_bam_fpaths = [] for fpath in bam_fpaths: if not verify_bam(fpath): bad_bam_fpaths.append(fpath) if bad_bam_fpaths: critical('BAM files cannot be found, empty or not BAMs:' + ', '.join(bad_bam_fpaths)) run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed')) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) if not cnf.project_name: cnf.project_name = basename(cnf.output_dir) info('Project name: ' + cnf.project_name) cnf.proc_name = 'TargQC' set_up_dirs(cnf) # cnf.name = 'TargQC_' + cnf.project_name check_genome_resources(cnf) verify_bed(cnf.bed, is_critical=True) bed_fpath = adjust_path(cnf.bed) info('Using amplicons/capture panel ' + bed_fpath) features_bed_fpath = adjust_path( cnf.features) if cnf.features else adjust_path(cnf.genome.features) info('Features: ' + features_bed_fpath) genes_fpath = None if cnf.genes: genes_fpath = adjust_path(cnf.genes) info('Custom genes list: ' + genes_fpath) if not cnf.only_summary: cnf.qsub_runner = adjust_system_path(cnf.qsub_runner) if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.') verify_file(cnf.qsub_runner, is_critical=True) info('*' * 70) info() targqc_html_fpath = run_targqc(cnf, cnf.output_dir, bam_fpaths, bed_fpath, features_bed_fpath, genes_fpath) if targqc_html_fpath: send_email( cnf, 'TargQC report for ' + cnf.project_name + ':\n ' + targqc_html_fpath)
def proc_args(argv): info(' '.join(sys.argv)) info() description = 'This script generates target QC reports for each BAM provided as an input.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--log-dir', dest='log_dir') parser.add_option('--is-wgs', dest='is_wgs', action='store_true', default=False, help='whole genome sequencing') parser.add_option('--is-deep-seq', dest='is_deep_seq', action='store_true', default=False, help='deep targeted sequencing') parser.add_option('--only-summary', dest='only_summary', action='store_true') parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'targetqc')) parser.add_option('-c', '--caller', dest='caller') parser.add_option('--qc', dest='qc', action='store_true', default=False) parser.add_option('--no-qc', dest='qc', action='store_false', default=False) parser.add_option('--qc-caption', dest='qc_caption', help=SUPPRESS_HELP) parser.add_option('--no-tsv', dest='tsv', action='store_false', default=True, help=SUPPRESS_HELP) (opts, args) = parser.parse_args() logger.is_debug = opts.debug if len(args) == 0: critical('No vcf files provided to input.') run_cnf = determine_run_cnf(opts, is_targetseq=opts.is_deep_seq, is_wgs=opts.is_wgs) cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf) vcf_fpath_by_sample = read_samples(args, cnf.caller) info() if not cnf.project_name: cnf.project_name = basename(cnf.output_dir) info('Project name: ' + cnf.project_name) cnf.proc_name = 'Variants' set_up_dirs(cnf) # cnf.name = 'TargQC_' + cnf.project_name info(' '.join(sys.argv)) samples = [ source.VarSample(s_name, join(cnf.output_dir, s_name), vcf=vcf_fpath) for s_name, vcf_fpath in vcf_fpath_by_sample.items() ] samples.sort(key=lambda _s: _s.key_to_sort()) check_genome_resources(cnf) if not cnf.only_summary: cnf.qsub_runner = adjust_system_path(cnf.qsub_runner) if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.') verify_file(cnf.qsub_runner, is_critical=True) return cnf, samples