Esempio n. 1
0
def _batch_gvcfs(data, region, vrn_files, ref_file, out_file=None):
    """Perform batching of gVCF files if above recommended input count.
    """
    if out_file is None:
        out_file = vrn_files[0]
    max_batch = int(dd.get_joint_group_size(data))
    if len(vrn_files) > max_batch:
        out = []
        # group to get below the maximum batch size, using 200 as the baseline
        for i, batch_vrn_files in enumerate(tz.partition_all(max(max_batch, 200), vrn_files)):
            base, ext = utils.splitext_plus(out_file)
            batch_out_file = "%s-b%s%s" % (base, i, ext)
            out.append(_run_combine_gvcfs(batch_vrn_files, region, ref_file, batch_out_file, data))
        return _batch_gvcfs(data, region, out, ref_file)
    else:
        return vrn_files
Esempio n. 2
0
def _batch_gvcfs(data, region, vrn_files, ref_file, out_file=None):
    """Perform batching of gVCF files if above recommended input count.
    """
    if out_file is None:
        out_file = vrn_files[0]
    # group to get below the maximum batch size, using 200 as the baseline
    max_batch = int(dd.get_joint_group_size(data))
    if len(vrn_files) > max_batch:
        out = []
        num_batches = int(math.ceil(float(len(vrn_files)) / max_batch))
        for i, batch_vrn_files in enumerate(tz.partition_all(num_batches, vrn_files)):
            base, ext = utils.splitext_plus(out_file)
            batch_out_file = "%s-b%s%s" % (base, i, ext)
            out.append(run_combine_gvcfs(batch_vrn_files, region, ref_file, batch_out_file, data))
        return _batch_gvcfs(data, region, out, ref_file)
    else:
        return vrn_files