def _symlink_vcfs(callers, datestamp_var_dirpath): errory = [] for caller in callers: info(caller.name) for sample in caller.samples: info(sample.name) filt_vcf_fpath = sample.find_filt_vcf_by_callername(caller.name) if not verify_file(filt_vcf_fpath): errory.append([sample.name, caller.name, filt_vcf_fpath]) else: base_filt_fpath = filt_vcf_fpath[: -3] if filt_vcf_fpath.endswith( '.gz') else filt_vcf_fpath for fpath in [ base_filt_fpath + '.gz', base_filt_fpath + '.idx', base_filt_fpath + '.gz.tbi' ]: if verify_file(fpath, silent=True): _symlink_to_dir(fpath, sample.dirpath) # _symlink_to_dir(fpath, datestamp_var_dirpath) BCBioStructure.move_vcfs_to_var(sample) return errory
def main(): info(' '.join(sys.argv)) info() description = 'This script converts Vardict TXT file to VCF.' parser = OptionParser( description=description, usage='Usage: ' + basename(__file__) + ' [-o Output_directory -c Var_caller_name] Project_directory') add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--log-dir', dest='log_dir', default='-') parser.add_option('-c', '--caller', dest='caller_name', default='vardict') parser.add_option('-o', dest='output_dir', help='Output directory.') cnf, bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths, tags, is_wgs_in_bcbio, is_rnaseq \ = process_post_bcbio_args(parser) if not bcbio_project_dirpaths: parser.print_help(file=sys.stderr) sys.exit(1) bcbio_structures = [] for bcbio_project_dirpath, bcbio_cnf, final_dirpath in zip( bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths): bs = BCBioStructure(cnf, bcbio_project_dirpath, bcbio_cnf, final_dirpath) bcbio_structures.append(bs) cnf.work_dir = cnf.work_dir or adjust_path(join(cnf.output_dir, 'work')) safe_mkdir(cnf.work_dir) info('') info('*' * 70) for bs in bcbio_structures: for sample in bs.samples: if sample.phenotype != 'normal': convert_vardict_txts_to_bcbio_vcfs(cnf, bs, sample)
def main(): info(' '.join(sys.argv)) info() description = 'This script evaluate capture target.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--log-dir', dest='log_dir') parser.add_option('--exac-only-filtering', dest='prepare_for_exac', action='store_true', default=False, help='Export filtered regions to ExAC browser.') parser.add_option('--exac', dest='add_to_exac', action='store_true', default=False, help='Export coverage data to ExAC browser.') parser.add_option('--bed', '--capture', '--amplicons', dest='bed', help='BED file to overlap.') parser.add_option( '--tricky-regions', dest='tricky_regions', action='store_true', default=False, help='Use high GC, low GC, low complexity regions to overlap.') parser.add_option('--min-percent', dest='min_percent', default='0.5', help='Minimal percent of region which has low coverage.') parser.add_option( '--min-ratio', dest='min_ratio', default='0.5', help='Minimal percent of samples which share the same feature.') parser.add_option('--min-depth', dest='min_depth', help='Coverage threshold.') parser.add_option('--metadata', dest='metadata', help='Samples type for each project ' '(plasma, cell_line, ffpe, deepseq, exome, wgs).') parser.add_option('-o', dest='output_dir', help='Output directory.') cnf, bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths, tags, is_wgs_in_bcbio, is_rnaseq \ = process_post_bcbio_args(parser) if not cnf.project_name: cnf.add_to_exac = False cnf.project_name = 'CaptureTargetEvaluation' if cnf.prepare_for_exac: cnf.output_dir = join(get_exac_dir(cnf), 'coverage', cnf.project_name) elif cnf.output_dir is None: cnf.output_dir = join(os.getcwd(), cnf.project_name) cnf.output_dir = safe_mkdir(adjust_path(cnf.output_dir)) cnf.work_dir = safe_mkdir(join(cnf.output_dir, 'work')) cnf.log_dir = safe_mkdir(join(cnf.work_dir), 'log') cnf.min_percent = 1 - float(cnf.min_percent) cnf.min_ratio = float(cnf.min_ratio) if cnf.min_depth: cnf.min_depth = int(cnf.min_depth) if cnf.metadata: cov_thresholds = { 'deepseq': 250, 'plasma': 100, 'exome': 20, 'ffpe': 10, 'cell_line': 10, 'wgs': 10 } cnf.min_depths = [ cov_thresholds[type] for type in cnf.metadata.split(',') ] if len(bcbio_project_dirpaths) < 1: critical('Usage: ' + __file__ + ' project_bcbio_path [project_bcbio_path] [-o output_dir]') info() info('*' * 70) safe_mkdir(cnf.output_dir) if cnf.log_dir: info('log_dirpath: ' + cnf.log_dir) safe_mkdir(cnf.log_dir) set_up_log(cnf, 'evaluate_capture_target', cnf.project_name, cnf.output_dir) bcbio_structures = [] for bcbio_project_dirpath, bcbio_cnf, final_dirpath in zip( bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths): bs = BCBioStructure(cnf, bcbio_project_dirpath, bcbio_cnf, final_dirpath) bcbio_structures.append(bs) cnf.work_dir = cnf.work_dir or adjust_path(join(cnf.output_dir, 'work')) safe_mkdir(cnf.work_dir) info('') info('*' * 70) regions_fpath = evaluate_capture(cnf, bcbio_structures) if cnf.add_to_exac: if not is_us(): err('Exposing to ExAC browser is available only on US server') return output_dirpath = join(get_exac_dir(cnf), 'coverage', cnf.project_name) safe_mkdir(output_dirpath) if regions_fpath and regions_fpath != join(output_dirpath, basename(regions_fpath)): shutil.copy(regions_fpath, join(output_dirpath, basename(regions_fpath))) shutil.copy(regions_fpath + '.tbi', join(output_dirpath, basename(regions_fpath + '.tbi'))) samples = [] sample_names = [s.name for bs in bcbio_structures for s in bs.samples] for bs in bcbio_structures: for sample in bs.samples: sample.name = get_uniq_sample_key(bs.project_name, sample, sample_names) samples.append(sample) calculate_coverage_use_grid(cnf, samples, output_dirpath) add_project_to_exac(cnf) else: info('Use --exac if you want to export data to ExAC browser') info('Done.')
def main(): description = 'This script runs reporting suite on the bcbio final directory.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--load-mongo', '--mongo-loader', dest='load_mongo', action='store_true', default=defaults['load_mongo'], help='Load to Mongo DB') parser.add_option( '--datahub-path', dest='datahub_path', help= 'DataHub directory path to upload final MAFs and CNV (can be remote).') parser.add_option( '--email', dest='email', help='E-mail address to send notifications on errors and finished jobs.' ) parser.add_option('--reannotate', dest='reannotate', action='store_true', default=False, help='Re-annotate BED file with gene names') parser.add_option('--extended', dest='extended', action='store_true', default=False, help='Count flagged regions and missed variants') parser.add_option('--dedup', dest='dedup', action='store_true', default=False, help='Count duplicates in coverage metrics') parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.') parser.add_option('--seq2c-controls', dest='seq2c_controls', help='Additional controls for Seq2C.') parser.add_option('--deep-seq', dest='deep_seq', action='store_true', default=False, help='Use run_info_DeepSeq.yaml') parser.add_option('--wgs', dest='is_wgs', action='store_true', default=None, help='Ignore sv_regions and run as WGS') parser.add_option('--only-summary', dest='only_summary', action='store_true', default=False, help='Only generate project-level report') parser.add_option('--jira', dest='jira', help='JIRA case path') parser.add_option('--bed', '--capture', '--amplicons', dest='bed', help='BED file to run targetSeq and Seq2C analysis on.') parser.add_option( '--exons', '--exome', dest='exons', help='Exons BED file to make targetSeq exon/amplicon regions reports.') parser.add_option('--no-prep-bed', dest='prep_bed', help='do not fix input beds and exons', action='store_false', default=True) parser.add_option('--no-dedup', dest='no_dedup', action='store_true', help=SUPPRESS_HELP) parser.add_option('-f', '--freq', '--min-freq', dest='min_freq', type='float', help='Minimum allele frequency for the filtering.') parser.add_option('-o', dest='output_dir', help='Output directory for report combining.') parser.add_option('--transcripts', dest='transcripts_fpath', help='Transcripts for annotation.') parser.add_option('--no-bam2bigwig', dest='no_bam2bigwig', action='store_true', default=False, help=SUPPRESS_HELP) cnf, bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths, tags, is_wgs_in_bcbio, is_rnaseq \ = process_post_bcbio_args(parser) is_wgs = cnf.is_wgs = cnf.is_wgs or is_wgs_in_bcbio cnf.run_date = time.localtime() cnf_project_name = cnf.project_name if len(bcbio_project_dirpaths) > 1: cnf.project_name = None info() info('*' * 70) bcbio_structures = [] for bcbio_project_dirpath, bcbio_cnf, final_dirpath in zip( bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths): bs = BCBioStructure(cnf, bcbio_project_dirpath, bcbio_cnf, final_dirpath, is_wgs=is_wgs, is_rnaseq=is_rnaseq) bcbio_structures.append(bs) # Post-processing one bcbio project as usually if len(bcbio_structures) == 1: if cnf.min_freq is not None: info('Min freq for filtering is %f' % cnf.min_freq) if cnf.steps and cnf.load_mongo and 'MongoLoader' not in cnf.steps: cnf.steps.append('MongoLoader') check_system_resources(cnf, required=['qsub']) bcbio_structure = bcbio_structures[0] bcbio_runner = BCBioRunner(cnf, bcbio_structure, cnf.bcbio_cnf) bcbio_runner.post_jobs() # Special case: multiple projects in input. No post-processing them, but rather combining summary reports together. elif len(bcbio_structures) > 1: if cnf_project_name: cnf.project_name = cnf_project_name else: cnf.project_name = '_'.join( [bs.project_name for bs in bcbio_structures]) if not cnf.output_dir: cnf.output_dir = join(os.getcwd(), cnf.project_name) safe_mkdir(cnf.output_dir) cnf.log_dir = join(cnf.output_dir, 'log') info('log_dirpath: ' + cnf.log_dir) safe_mkdir(cnf.log_dir) set_up_log(cnf, 'miltiple_projects', cnf.project_name, cnf.output_dir) cnf.work_dir = adjust_path(join(cnf.output_dir, 'work')) safe_mkdir(cnf.work_dir) safe_mkdir(adjust_path(join(cnf.output_dir, 'config'))) combine_projects(cnf, bcbio_structures, tags)
def combine_projects(cnf, bcbio_structures, tags=None): tag_by_sample = dict() if tags: for bs, tag in zip(bcbio_structures, tags): for s in bs.samples: tag_by_sample[s.name] = tag or bs.project_name # else: # for bs in bcbio_structures: # for s in bs.sampels: # tag_by_sample[s.name] = bs.project_name final_dirpath = adjust_path(join(cnf.output_dir, 'final')) safe_mkdir(final_dirpath) merged_bcbio_cnf = merge_bcbio_yamls(cnf, bcbio_structures) samples = [s for bs in bcbio_structures for s in bs.samples] dirs_to_reprocess = [ source.clinreport_dir, BCBioStructure.var_dir, source.varannotate_name, source.varfilter_name ] for s in samples: sample_dir = join(final_dirpath, s.name) sample_var_dirpath = join(sample_dir, BCBioStructure.var_dir) safe_mkdir(sample_var_dirpath) for file_or_dir in os.listdir(s.dirpath): if file_or_dir not in dirs_to_reprocess: safe_symlink_to(join(s.dirpath, file_or_dir), sample_dir) for file in os.listdir(s.var_dirpath): safe_symlink_to(join(s.var_dirpath, file), sample_var_dirpath) merged_date_dir = join( final_dirpath, merged_bcbio_cnf['fc_date'] + '_' + merged_bcbio_cnf['fc_name']) merged_bs_var_dirpath = join(merged_date_dir, BCBioStructure.var_dir) merged_bs_raw_var_dirpath = join(merged_bs_var_dirpath, 'raw') safe_mkdir(merged_bs_raw_var_dirpath) for bs in bcbio_structures: for file in os.listdir(bs.raw_var_dirpath): safe_symlink_to(join(bs.raw_var_dirpath, file), merged_bs_raw_var_dirpath) variants_fpaths = [] vardict_txt_fname = variant_filtering.mut_fname_template.format( caller_name='vardict') variants_fpath = join(merged_bs_var_dirpath, vardict_txt_fname) pass_variants_fpath = add_suffix(variants_fpath, variant_filtering.mut_pass_suffix) reject_variants_fpath = add_suffix(variants_fpath, variant_filtering.mut_reject_suffix) cnf.steps = ['Variants'] for bs_i, bs in enumerate( bcbio_structures ): # re-filtering, perform cohort-based filtering only within sub-projects correct_bs = BCBioStructure(cnf, cnf.output_dir, bs.bcbio_cnf, final_dirpath) bcbio_runner = BCBioRunner(cnf, correct_bs, bs.bcbio_cnf) bcbio_runner.post_jobs() bs_raw_variants_fpath = add_suffix(variants_fpath, str(bs_i)) pass_bs_variants_fpath = add_suffix(bs_raw_variants_fpath, variant_filtering.mut_pass_suffix) reject_bs_variants_fpath = add_suffix( bs_raw_variants_fpath, variant_filtering.mut_reject_suffix) shutil.move(variants_fpath, bs_raw_variants_fpath) shutil.move(pass_variants_fpath, pass_bs_variants_fpath) shutil.move(reject_variants_fpath, reject_bs_variants_fpath) variants_fpaths.append(bs_raw_variants_fpath) merged_bs = BCBioStructure(cnf, cnf.output_dir, merged_bcbio_cnf, final_dirpath) merged_samples = [s for s in merged_bs.samples] cnf.variant_filtering.max_ratio = 1 combine_results(cnf, merged_samples, variants_fpaths, variants_fpath, pass_variants_fpath=pass_variants_fpath) for variants_fpath in variants_fpaths: safe_remove(variants_fpath) pass_fpath = add_suffix(variants_fpath, variant_filtering.mut_pass_suffix) safe_remove(pass_fpath) reject_fpath = add_suffix(variants_fpath, variant_filtering.mut_reject_suffix) safe_remove(reject_fpath) cnf.reuse_intermediate = True cnf.steps = ['Seq2C', 'Summary'] BCBioRunner(cnf, merged_bs, merged_bs.bcbio_cnf).post_jobs()
def main(): info(' '.join(sys.argv)) info() description = 'This script makes paired WGS-target clincial reports based on 2 bcbio projects.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1) parser.add_option( '--email', dest='email', help='E-mail address to send notifications on errors and finished jobs.' ) parser.add_option('--jira', dest='jira', help='JIRA case path') parser.add_option('--bed', '--capture', '--amplicons', dest='bed', help='BED file to run targetSeq and Seq2C analysis on.') parser.add_option( '--exons', '--exome', dest='exons', help='Exons BED file to make targetSeq exon/amplicon regions reports.') parser.add_option('-o', dest='output_dir', help='Output directory for report combining.') cnf, bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths, tags, is_wgs_in_bcbio, is_rnaseq \ = process_post_bcbio_args(parser) is_wgs = cnf.is_wgs = cnf.is_wgs or is_wgs_in_bcbio if len(bcbio_project_dirpaths) < 2 or len(bcbio_project_dirpaths) > 2: critical('Usage: ' + __file__ + ' wgs_project_project_bcbio_path ' 'targetseq_project_bcbio_path [-o output_dir]') info() info('*' * 70) bcbio_structures = [] for bcbio_project_dirpath, bcbio_cnf, final_dirpath in zip( bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths): bs = BCBioStructure(cnf, bcbio_project_dirpath, bcbio_cnf, final_dirpath, is_wgs=is_wgs, is_rnaseq=is_rnaseq) bcbio_structures.append(bs) trg_bs = next((bs for bs in bcbio_structures if bs.bed), None) wgs_bs = next((bs for bs in bcbio_structures if not bs.bed), None) if not trg_bs and not wgs_bs: critical('One of the projects must be targeted, and one must be WGS') if not trg_bs: critical('One of the projects must be targeted.') if not wgs_bs: critical('One of the projects must be WGS.') if not cnf.project_name: cnf.project_name = wgs_bs.project_name.replace('_WGS', '').replace('WGS', '') if cnf.output_dir is None: cnf.output_dir = join(os.getcwd(), cnf.project_name) safe_mkdir(cnf.output_dir) cnf.log_dir = join(cnf.output_dir, 'log') info('log_dirpath: ' + cnf.log_dir) safe_mkdir(cnf.log_dir) set_up_log(cnf, 'clinical_target2wgs', cnf.project_name, cnf.output_dir) cnf.work_dir = cnf.work_dir or adjust_path(join(cnf.output_dir, 'work')) safe_mkdir(cnf.work_dir) shared_sample_names = set(s.name for s in wgs_bs.samples) & set( s.name for s in trg_bs.samples) if not shared_sample_names: critical('Not shared samples in target and WGS projects.\n' 'Target: ' + ', '.join(s.name for s in trg_bs.samples) + 'WGS: ' + ', '.join(s.name for s in wgs_bs.samples)) info('Shared samples: ' + ', '.join(shared_sample_names)) info('') info('*' * 70) run_clinical_target2wgs(cnf.genome, wgs_bs, trg_bs, shared_sample_names, cnf.output_dir)
def main(): info(' '.join(sys.argv)) info() description = 'This script makes clinical reports based on multiple bcbio projects.' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--log-dir', dest='log_dir') parser.add_option( '--email', dest='email', help='E-mail address to send notifications on errors and finished jobs.' ) parser.add_option('--metadata', dest='metadata_csv', help='CSV file with parameters of each sample.') parser.add_option('-o', dest='output_dir', help='Output directory for report combining.') cnf, bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths, tags, is_wgs_in_bcbio, is_rnaseq \ = process_post_bcbio_args(parser) is_wgs = cnf.is_wgs = cnf.is_wgs or is_wgs_in_bcbio if not cnf.metadata_csv: critical( 'Provide the path to CSV file with information of each sample') critical( 'Usage: ' + __file__ + ' project_bcbio_path [project_bcbio_path] --metadata metadata_path [-o output_dir]' ) cnf.sample_names = [] parameters_info, samples_data = parse_samples_metadata( cnf, cnf.metadata_csv) info() info('*' * 70) bcbio_structures = [] for bcbio_project_dirpath, bcbio_cnf, final_dirpath in zip( bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths): bs = BCBioStructure(cnf, bcbio_project_dirpath, bcbio_cnf, final_dirpath, is_wgs=is_wgs, is_rnaseq=is_rnaseq) bcbio_structures.append(bs) for s in bs.samples: assert s.targetcov_json_fpath, str(s.dirpath) + ' ' + str( s.targqc_dirpath) if cnf.output_dir is None and cnf.project_name is None: critical( 'Either -o (output dir) or --project (project name) has to be specified' ) if not cnf.output_dir: cnf.output_dir = join(os.getcwd(), cnf.project_name) if not cnf.project_name: cnf.project_name = 'Combined_project' cnf.output_dir = safe_mkdir(adjust_path(cnf.output_dir)) cnf.log_dir = join(cnf.output_dir, 'log') info('log_dirpath: ' + cnf.log_dir) safe_mkdir(cnf.log_dir) set_up_log(cnf, 'combine_clin_reports', cnf.project_name, cnf.output_dir) cnf.work_dir = cnf.work_dir or adjust_path(join(cnf.output_dir, 'work')) safe_mkdir(cnf.work_dir) # shared_sample_names = set(s.name for bs in bcbio_structures for s in bs.samples) # if not shared_sample_names: # sample_names = [bs.project_name + ': ' + ', '.join(s.name for s in bs.samples) for bs in bcbio_structures] # critical('Not shared samples in projects.\n' + '\n'.join(sample_names)) # info('Shared samples: ' + ', '.join(shared_sample_names)) info('') info('*' * 70) run_combine_clinical_reports(cnf, bcbio_structures, parameters_info, samples_data)
def main(): info(' '.join(sys.argv)) info() description = 'This script prepare data for ExAC browser' parser = OptionParser(description=description) add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser) parser.add_option('--log-dir', dest='log_dir', default='-') parser.add_option('--bed', dest='bed', help='BED file.') parser.add_option('--evaluate-capture-target', dest='do_evaluate_capture', action='store_true', help='Evaluate capture target.') parser.add_option('-o', dest='output_dir', help='Output directory with ExAC data.') cnf, bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths, tags, is_wgs_in_bcbio, is_rnaseq \ = process_post_bcbio_args(parser) if not cnf.genome: critical( 'Usage: ' + __file__ + ' -g hg19 project_bcbio_path [project_bcbio_path] [--bed bed_fpath] [-o output_dir] [--evaluate-capture-target]' ) cnf.output_dir = get_exac_dir(cnf) # if not cnf.output_dir: # critical('Error! Please specify ExAC browser data directory') if len(bcbio_project_dirpaths) < 1: critical( 'Usage: ' + __file__ + ' -g hg19 project_bcbio_path [project_bcbio_path] [--bed bed_fpath] [-o output_dir] [--evaluate-capture-target]' ) info() info('*' * 70) bcbio_structures = [] project_name = cnf.project_name cnf.project_name = None for bcbio_project_dirpath, bcbio_cnf, final_dirpath in zip( bcbio_project_dirpaths, bcbio_cnfs, final_dirpaths): bs = BCBioStructure(cnf, bcbio_project_dirpath, bcbio_cnf, final_dirpath) bcbio_structures.append(bs) cnf.project_name = project_name if not cnf.project_name: if len(bcbio_structures) == 1: cnf.project_name = bcbio_structures[0].project_name else: critical( 'If you combine multiple BCBIO projects you should specify new project name' ) cnf.caller_name = 'vardict' if cnf.output_dir is None: critical('Please specify path to ExAC data directory.') safe_mkdir(cnf.output_dir) cnf.log_dir = join(cnf.output_dir, cnf.project_name + '_log') info('log_dirpath: ' + cnf.log_dir) safe_mkdir(cnf.log_dir) set_up_log(cnf, 'prepare_for_exac', cnf.project_name, cnf.output_dir) cnf.work_dir = cnf.work_dir or adjust_path( join(cnf.output_dir, 'work', cnf.project_name)) safe_mkdir(cnf.work_dir) samples = [] for bs in bcbio_structures: for sample in bs.samples: sample.name = get_uniq_sample_key(bs.project_name, sample) samples.append(sample) info() info('Preparing variants data') variants_dirpath = join(cnf.output_dir, 'vardict') safe_mkdir(variants_dirpath) combined_vcf_raw_fpath = join(variants_dirpath, cnf.project_name + '.vcf') combined_vcf_fpath = combined_vcf_raw_fpath + '.gz' if not cnf.reuse_intermediate or not verify_file(combined_vcf_fpath): vcf_fpath_by_sname = dict() for bs in bcbio_structures: pass_mut_fpaths = get_mutations_fpaths(bs) vcf_fpaths, pass_vcf_fpaths = convert_vardict_txts_to_bcbio_vcfs( cnf.work_dir, cnf.genome.name, pass_mut_fpaths, bs.samples, cnf.caller_name, output_dirpath=cnf.work_dir, pass_only=False, bed_fpath=bs.sv_bed, min_freq=bs.cnf.variant_filtering['min_freq'], act_min_freq=bs.cnf.variant_filtering['act_min_freq']) if not vcf_fpaths and not pass_vcf_fpaths: continue for sample, vcf_fpath, pass_vcf_fpath in zip( bs.samples, vcf_fpaths, pass_vcf_fpaths): if vcf_fpath and verify_file(vcf_fpath): vcf_fpath_by_sname[sample.name] = vcf_fpath elif pass_vcf_fpath and verify_file(pass_vcf_fpath): vcf_fpath_by_sname[sample.name] = pass_vcf_fpath if not vcf_fpath_by_sname: info('No VCFs found, skipping preparing variants') else: info() combined_vcf_fpath = merge_vcfs(cnf, vcf_fpath_by_sname, combined_vcf_raw_fpath) project_vcf_dirpath = join(variants_dirpath, cnf.project_name) safe_mkdir(project_vcf_dirpath) for sample_name, vcf_fpath in vcf_fpath_by_sname.items(): if verify_file(vcf_fpath) and not verify_file(join( project_vcf_dirpath, basename(vcf_fpath)), silent=True): shutil.move(vcf_fpath, project_vcf_dirpath) shutil.move(vcf_fpath + '.tbi', project_vcf_dirpath) info() info('Saving coverage') project_cov_dirpath = join(cnf.output_dir, 'coverage', cnf.project_name) safe_mkdir(project_cov_dirpath) calculate_coverage_use_grid(cnf, samples, project_cov_dirpath) if cnf.do_evaluate_capture: evaluate_capture(cnf, bcbio_project_dirpaths) if combined_vcf_fpath: info() info('Creating BAM files for IGV') exac_features_fpath = os.path.join(exac_data_dir, cnf.genome.name, 'all_features.bed.gz') split_bam_files_use_grid(cnf, samples, combined_vcf_fpath, exac_features_fpath) else: warn( 'Combined VCF file does not exist. BAM files for IGV cannot be created' ) info() add_project_to_exac(cnf) info('Done.')