def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw): """Setup analysis that merges multiple sample runs. :param flist: list of file names, by default *-bcbb-config.yaml files :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input. :returns: updated flist with config files for merged samples """ new_flist = [] sample_d = sample_group_fn(flist) for k, v in sample_d.iteritems(): if len(v) > 1: f = v[v.keys()[0]] out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR) LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d)) dry_makedir(out_d, dry_run=False) pp = kw.get("post_process") if kw.get("post_process", None) else f.replace("-bcbb-config.yaml", "-post_process.yaml") with open(pp) as fh: conf = yaml.load(fh) conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) }) pp_new = os.path.join(out_d, os.path.basename(pp)) dry_unlink(pp_new, dry_run=kw.get('dry_run', True)) dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) # Setup merged bcbb-config file bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True)) bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0])) bcbb_config = sort_sample_config_fastq(bcbb_config) if not os.path.exists(bcbb_config_file) or kw.get('new_config', False): dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True)) dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) ##new_flist.extend(v.values()) new_flist.extend([bcbb_config_file]) return new_flist
def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw): """Setup analysis that merges multiple sample runs. :param flist: list of file names, by default *-bcbb-config.yaml files :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input. :returns: updated flist with config files for merged samples """ new_flist = [] sample_d = sample_group_fn(flist) for k, v in sample_d.iteritems(): if len(v): f = v[v.keys()[0]] out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR) LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d)) dry_makedir(out_d, dry_run=False) pp = kw.get("post_process",f.replace("-bcbb-config.yaml", "-post_process.yaml")) with open(pp) as fh: conf = yaml.load(fh) conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) }) pp_new = os.path.join(out_d, os.path.basename(pp)) dry_unlink(pp_new, dry_run=kw.get('dry_run', True)) dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) # Setup merged bcbb-config file bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True)) bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0])) bcbb_config = sort_sample_config_fastq(bcbb_config, path=out_d) if not os.path.exists(bcbb_config_file) or kw.get('new_config', False): dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True)) dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) ##new_flist.extend(v.values()) new_flist.extend([bcbb_config_file]) return new_flist
def setup_sample(f, analysis, amplicon=False, genome_build="hg19", **kw): """Setup config files, making backups and writing new files :param path: root path in which to search for samples :param dry_run: dry run flag """ if not os.path.exists(f): return with open(f) as fh: config = yaml.load(fh) ## Check for correctly formatted config if not config.get("details", None): LOG.warn("Couldn't find 'details' section in config file {}: aborting setup!".format(f)) return ## Save file to backup if backup doesn't exist f_bak = f.replace("-bcbb-config.yaml", "-bcbb-config.yaml.bak") if not os.path.exists(f_bak): LOG.info("Making backup of {} in {}".format(f, f_bak)) dry_backup(os.path.abspath(f), dry_run=kw['dry_run']) ## Save command file to backup if it doesn't exist cmdf = f.replace("-bcbb-config.yaml", "-bcbb-command.txt") if os.path.exists(cmdf): cmdf_bak = cmdf.replace("-bcbb-command.txt", "-bcbb-command.txt.bak") if not os.path.exists(cmdf_bak): LOG.info("Making backup of {} in {}".format(cmdf, cmdf_bak)) dry_backup(os.path.abspath(cmdf), dry_run=kw['dry_run']) ## Save post_process file to backup if it doesn't exist ppf = f.replace("-bcbb-config.yaml", "-post_process.yaml") if os.path.exists(ppf): ppf_bak = ppf.replace("-post_process.yaml", "-post_process.yaml.bak") if not os.path.exists(ppf_bak): LOG.info("Making backup of {} in {}".format(ppf, ppf_bak)) dry_backup(ppf, dry_run=kw['dry_run']) if analysis: config = update_sample_config(config, "analysis", analysis) if genome_build: config = update_sample_config(config, "genome_build", genome_build) config = sort_sample_config_fastq(config) ## Remove config file and rewrite dry_unlink(f, kw['dry_run']) dry_write(f, yaml.safe_dump(config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run']) ## Setup post process only if not provided at command line if not kw.get("post_process", None): ppfile = f.replace("-bcbb-config.yaml", "-post_process.yaml") with open(ppfile) as fh: pp = yaml.load(fh) ## Need to set working directory to path of bcbb-config.yaml file if pp.get('distributed', {}).get('platform_args', None): platform_args = pp['distributed']['platform_args'].split() if "-D" in platform_args: platform_args[platform_args.index("-D")+1] = os.path.dirname(f) elif "--workdir" in platform_args: platform_args[platform_args.index("--workdir")+1] = os.path.dirname(f) pp['distributed']['platform_args'] = " ".join(platform_args) ## Change keys for all analyses for anl in pp.get('custom_algorithms',{}).keys(): if kw.get('baits', None): pp['custom_algorithms'][anl]['hybrid_bait'] = kw['baits'] if kw.get('targets', None): pp['custom_algorithms'][anl]['hybrid_target'] = kw['targets'] if amplicon: pp['custom_algorithms'][anl]['mark_duplicates'] = False if amplicon: LOG.info("setting amplicon analysis") pp['algorithm']['mark_duplicates'] = False if kw.get('galaxy_config', None): pp['galaxy_config'] = kw['galaxy_config'] if kw.get('distributed', None): LOG.info("setting distributed execution") pp['algorithm']['num_cores'] = 'messaging' else: LOG.info("setting parallell execution") pp['algorithm']['num_cores'] = kw['num_cores'] if kw.get('snpEff', None): LOG.info("setting snpEff to {}".format(kw["snpEff"])) pp['program']['snpEff'] = kw['snpEff'] dry_unlink(ppfile, dry_run=kw['dry_run']) dry_write(ppfile, yaml.safe_dump(pp, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run'])
def setup_sample(f, analysis, amplicon=False, genome_build="hg19", **kw): """Setup config files, making backups and writing new files :param path: root path in which to search for samples :param dry_run: dry run flag """ if not os.path.exists(f): return with open(f) as fh: config = yaml.load(fh) ## Check for correctly formatted config if not config.get("details", None): LOG.warn("Couldn't find 'details' section in config file {}: aborting setup!".format(f)) return ## Save file to backup if backup doesn't exist f_bak = f.replace("-bcbb-config.yaml", "-bcbb-config.yaml.bak") if not os.path.exists(f_bak): LOG.info("Making backup of {} in {}".format(f, f_bak)) dry_backup(os.path.abspath(f), dry_run=kw['dry_run']) ## Save command file to backup if it doesn't exist cmdf = f.replace("-bcbb-config.yaml", "-bcbb-command.txt") if os.path.exists(cmdf): cmdf_bak = cmdf.replace("-bcbb-command.txt", "-bcbb-command.txt.bak") if not os.path.exists(cmdf_bak): LOG.info("Making backup of {} in {}".format(cmdf, cmdf_bak)) dry_backup(os.path.abspath(cmdf), dry_run=kw['dry_run']) ## Save post_process file to backup if it doesn't exist ppf = f.replace("-bcbb-config.yaml", "-post_process.yaml") if os.path.exists(ppf): ppf_bak = ppf.replace("-post_process.yaml", "-post_process.yaml.bak") if not os.path.exists(ppf_bak): LOG.info("Making backup of {} in {}".format(ppf, ppf_bak)) dry_backup(ppf, dry_run=kw['dry_run']) if analysis: config = update_sample_config(config, "analysis", analysis) if genome_build: config = update_sample_config(config, "genome_build", genome_build) config = sort_sample_config_fastq(config) ## Remove config file and rewrite dry_unlink(f, kw['dry_run']) dry_write(f, yaml.safe_dump(config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run']) ## Setup post process only if not provided at command line if not kw.get("post_process", None): ppfile = f.replace("-bcbb-config.yaml", "-post_process.yaml") with open(ppfile) as fh: pp = yaml.load(fh) ## Need to set working directory to path of bcbb-config.yaml file if pp.get('distributed', {}).get('platform_args', None): platform_args = pp['distributed']['platform_args'].split() if "-D" in platform_args: platform_args[platform_args.index("-D")+1] = os.path.dirname(f) elif "--workdir" in platform_args: platform_args[platform_args.index("--workdir")+1] = os.path.dirname(f) pp['distributed']['platform_args'] = " ".join(platform_args) ## Change keys for all analyses for anl in pp.get('custom_algorithms',{}).keys(): if kw.get('baits', None): pp['custom_algorithms'][anl]['hybrid_bait'] = kw['baits'] if kw.get('targets', None): pp['custom_algorithms'][anl]['hybrid_target'] = kw['targets'] if amplicon: pp['custom_algorithms'][anl]['mark_duplicates'] = False if amplicon: LOG.info("setting amplicon analysis") pp['algorithm']['mark_duplicates'] = False if kw.get('galaxy_config', None): pp['galaxy_config'] = kw['galaxy_config'] if kw.get('distributed', None): LOG.info("setting distributed execution") pp['algorithm']['num_cores'] = 'messaging' elif kw.get('num_cores', None): LOG.info("setting parallell execution") pp['algorithm']['num_cores'] = kw['num_cores'] if kw.get('snpEff', None): LOG.info("setting snpEff to {}".format(kw["snpEff"])) pp['program']['snpEff'] = kw['snpEff'] dry_unlink(ppfile, dry_run=kw['dry_run']) dry_write(ppfile, yaml.safe_dump(pp, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run'])