def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw): """Setup analysis that merges multiple sample runs. :param flist: list of file names, by default *-bcbb-config.yaml files :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input. :returns: updated flist with config files for merged samples """ new_flist = [] sample_d = sample_group_fn(flist) for k, v in sample_d.iteritems(): if len(v) > 1: f = v[v.keys()[0]] out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR) LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d)) dry_makedir(out_d, dry_run=False) pp = kw.get("post_process") if kw.get("post_process", None) else f.replace("-bcbb-config.yaml", "-post_process.yaml") with open(pp) as fh: conf = yaml.load(fh) conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) }) pp_new = os.path.join(out_d, os.path.basename(pp)) dry_unlink(pp_new, dry_run=kw.get('dry_run', True)) dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) # Setup merged bcbb-config file bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True)) bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0])) bcbb_config = sort_sample_config_fastq(bcbb_config) if not os.path.exists(bcbb_config_file) or kw.get('new_config', False): dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True)) dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) ##new_flist.extend(v.values()) new_flist.extend([bcbb_config_file]) return new_flist
def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw): """Setup analysis that merges multiple sample runs. :param flist: list of file names, by default *-bcbb-config.yaml files :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input. :returns: updated flist with config files for merged samples """ new_flist = [] sample_d = sample_group_fn(flist) for k, v in sample_d.iteritems(): if len(v): f = v[v.keys()[0]] out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR) LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d)) dry_makedir(out_d, dry_run=False) pp = kw.get("post_process",f.replace("-bcbb-config.yaml", "-post_process.yaml")) with open(pp) as fh: conf = yaml.load(fh) conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) }) pp_new = os.path.join(out_d, os.path.basename(pp)) dry_unlink(pp_new, dry_run=kw.get('dry_run', True)) dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) # Setup merged bcbb-config file bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True)) bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0])) bcbb_config = sort_sample_config_fastq(bcbb_config, path=out_d) if not os.path.exists(bcbb_config_file) or kw.get('new_config', False): dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True)) dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True)) ##new_flist.extend(v.values()) new_flist.extend([bcbb_config_file]) return new_flist
def test_merge_sample_config(self): """Test merging sample configuration files""" flist = find_samples(j_doe_00_05) fdict = _group_samples(flist) out_d = os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL") if not os.path.exists(out_d): os.makedirs(out_d) newconf = merge_sample_config(fdict["P001_101_index3"].values(), "P001_101_index3", out_d=out_d, dry_run=False) self.assertTrue(os.path.exists(os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL", "P001_101_index3_B002BBBXX_TGACCA_L001_R1_001.fastq.gz" ))) self.assertTrue(os.path.exists(os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL", "P001_101_index3_C003CCCXX_TGACCA_L001_R1_001.fastq.gz" )))
def test_merge_sample_config(self): """Test merging sample configuration files""" flist = find_samples(j_doe_00_05) fdict = _group_samples(flist) out_d = os.path.join(j_doe_00_05, "P001_101_index3", "TOTAL") if not os.path.exists(out_d): os.makedirs(out_d) newconf = merge_sample_config(fdict["P001_101_index3"].values(), "P001_101_index3", out_d=out_d, dry_run=False) self.assertTrue( os.path.exists( os.path.join( j_doe_00_05, "P001_101_index3", "TOTAL", "P001_101_index3_B002BBBXX_TGACCA_L001_R1_001.fastq.gz"))) self.assertTrue( os.path.exists( os.path.join( j_doe_00_05, "P001_101_index3", "TOTAL", "P001_101_index3_C003CCCXX_TGACCA_L001_R1_001.fastq.gz")))