Ejemplo n.º 1
0
def samples_to_records(samples, default_keys=None):
    """Convert samples into output CWL records.
    """
    from bcbio.pipeline import run_info
    RECORD_CONVERT_TO_LIST = set([
        "config__algorithm__tools_on", "config__algorithm__tools_off",
        "reference__genome_context"
    ])
    all_keys = _get_all_cwlkeys(samples, default_keys)
    out = []
    for data in samples:
        for raw_key in sorted(list(all_keys)):
            key = raw_key.split("__")
            if tz.get_in(key, data) is None:
                data = tz.update_in(data, key, lambda x: None)
            if raw_key not in data["cwl_keys"]:
                data["cwl_keys"].append(raw_key)
            if raw_key in RECORD_CONVERT_TO_LIST:
                val = tz.get_in(key, data)
                if not val: val = []
                elif not isinstance(val, (list, tuple)): val = [val]
                data = tz.update_in(data, key, lambda x: val)
            # Booleans are problematic for CWL serialization, convert into string representation
            if isinstance(tz.get_in(key, data), bool):
                data = tz.update_in(data, key,
                                    lambda x: str(tz.get_in(key, data)))
        data["metadata"] = run_info.add_metadata_defaults(
            data.get("metadata", {}))
        out.append(data)
    return out
Ejemplo n.º 2
0
def samples_to_records(samples, default_keys=None):
    """Convert samples into output CWL records.
    """
    from bcbio.pipeline import run_info
    RECORD_CONVERT_TO_LIST = set(["config__algorithm__tools_on", "config__algorithm__tools_off",
                                  "reference__genome_context"])
    all_keys = _get_all_cwlkeys(samples, default_keys)
    out = []
    for data in samples:
        for raw_key in sorted(list(all_keys)):
            key = raw_key.split("__")
            if tz.get_in(key, data) is None:
                data = tz.update_in(data, key, lambda x: None)
            if raw_key not in data["cwl_keys"]:
                data["cwl_keys"].append(raw_key)
            if raw_key in RECORD_CONVERT_TO_LIST:
                val = tz.get_in(key, data)
                if not val: val = []
                elif not isinstance(val, (list, tuple)): val = [val]
                data = tz.update_in(data, key, lambda x: val)
            # Booleans are problematic for CWL serialization, convert into string representation
            if isinstance(tz.get_in(key, data), bool):
                data = tz.update_in(data, key, lambda x: str(tz.get_in(key, data)))
        data["metadata"] = run_info.add_metadata_defaults(data.get("metadata", {}))
        out.append(data)
    return out
Ejemplo n.º 3
0
def samples_to_records(samples):
    """Convert samples into output CWL records.
    """
    from bcbio.pipeline import run_info
    RECORD_CONVERT_TO_LIST = set([
        "config__algorithm__tools_on", "config__algorithm__tools_off",
        "config__algorithm__svcaller"
    ])
    all_keys = _get_all_cwlkeys(samples)
    out = []
    for data in samples:
        for raw_key in sorted(list(all_keys)):
            key = raw_key.split("__")
            if tz.get_in(key, data) is None:
                data = tz.update_in(data, key, lambda x: None)
                data["cwl_keys"].append(raw_key)
            if raw_key in RECORD_CONVERT_TO_LIST:
                val = tz.get_in(key, data)
                if not val: val = []
                elif not isinstance(val, (list, tuple)): val = [val]
                data = tz.update_in(data, key, lambda x: val)
        data["metadata"] = run_info.add_metadata_defaults(
            data.get("metadata", {}))
        out.append(data)
    return out
Ejemplo n.º 4
0
def _samples_to_records(samples):
    """Convert samples into output CWL records.
    """
    from bcbio.pipeline import run_info
    RECORD_CONVERT_TO_LIST = set(["config__algorithm__tools_on", "config__algorithm__tools_off"])
    all_keys = _get_all_cwlkeys(samples)
    out = []
    for data in samples:
        for raw_key in sorted(list(all_keys)):
            key = raw_key.split("__")
            if tz.get_in(key, data) is None:
                data = tz.update_in(data, key, lambda x: None)
                data["cwl_keys"].append(raw_key)
            if raw_key in RECORD_CONVERT_TO_LIST:
                val = tz.get_in(key, data)
                if not val: val = []
                elif not isinstance(val, (list, tuple)): val = [val]
                data = tz.update_in(data, key, lambda x: val)
        data["metadata"] = run_info.add_metadata_defaults(data.get("metadata", {}))
        out.append(data)
    return out
Ejemplo n.º 5
0
def batch_for_variantcall(samples):
    """Prepare a set of samples for parallel variant calling.

    CWL input target that groups samples into batches and variant callers
    for parallel processing.
    """
    from bcbio.pipeline import run_info
    convert_to_list = set(["config__algorithm__tools_on", "config__algorithm__tools_off"])
    default_keys = set(["metadata__batch", "config__algorithm__validate",
                        "config__algorithm__validate_regions"])
    to_process, extras = _dup_samples_by_variantcaller(samples, require_bam=False)
    batch_groups = collections.defaultdict(list)
    to_process = [utils.to_single_data(x) for x in to_process]
    all_keys = set([])
    for data in to_process:
        all_keys.update(set(data["cwl_keys"]))
    all_keys.update(default_keys)
    for data in to_process:
        for raw_key in sorted(list(all_keys)):
            key = raw_key.split("__")
            if tz.get_in(key, data) is None:
                data = tz.update_in(data, key, lambda x: None)
                data["cwl_keys"].append(raw_key)
            if raw_key in convert_to_list:
                val = tz.get_in(key, data)
                if not val: val = []
                elif not isinstance(val, (list, tuple)): val = [val]
                data = tz.update_in(data, key, lambda x: val)
        vc = get_variantcaller(data, require_bam=False)
        data["metadata"] = run_info.add_metadata_defaults(data.get("metadata", {}))
        batches = dd.get_batches(data) or dd.get_sample_name(data)
        if not isinstance(batches, (list, tuple)):
            batches = [batches]
        for b in batches:
            batch_groups[(b, vc)].append(utils.deepish_copy(data))
    return list(batch_groups.values()) + extras