def _batch_gvcfs(data, region, vrn_files, ref_file, out_file=None): """Perform batching of gVCF files if above recommended input count. """ if out_file is None: out_file = vrn_files[0] max_batch = int(dd.get_joint_group_size(data)) if len(vrn_files) > max_batch: out = [] # group to get below the maximum batch size, using 200 as the baseline for i, batch_vrn_files in enumerate(tz.partition_all(max(max_batch, 200), vrn_files)): base, ext = utils.splitext_plus(out_file) batch_out_file = "%s-b%s%s" % (base, i, ext) out.append(_run_combine_gvcfs(batch_vrn_files, region, ref_file, batch_out_file, data)) return _batch_gvcfs(data, region, out, ref_file) else: return vrn_files
def _batch_gvcfs(data, region, vrn_files, ref_file, out_file=None): """Perform batching of gVCF files if above recommended input count. """ if out_file is None: out_file = vrn_files[0] # group to get below the maximum batch size, using 200 as the baseline max_batch = int(dd.get_joint_group_size(data)) if len(vrn_files) > max_batch: out = [] num_batches = int(math.ceil(float(len(vrn_files)) / max_batch)) for i, batch_vrn_files in enumerate(tz.partition_all(num_batches, vrn_files)): base, ext = utils.splitext_plus(out_file) batch_out_file = "%s-b%s%s" % (base, i, ext) out.append(run_combine_gvcfs(batch_vrn_files, region, ref_file, batch_out_file, data)) return _batch_gvcfs(data, region, out, ref_file) else: return vrn_files