Example #1
0
def summarize_grading(samples, vkey="validate"):
    """Provide summaries of grading results across all samples.

    Handles both traditional pipelines (validation part of variants) and CWL
    pipelines (validation at top level)
    """
    samples = list(utils.flatten(samples))
    if not _has_grading_info(samples, vkey):
        return [[d] for d in samples]
    validate_dir = utils.safe_makedir(
        os.path.join(samples[0]["dirs"]["work"], vkey))
    header = ["sample", "caller", "variant.type", "category", "value"]
    validated, out = _group_validate_samples(samples, vkey)
    for vname, vitems in validated.items():
        out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname)
        with open(out_csv, "w") as out_handle:
            writer = csv.writer(out_handle)
            writer.writerow(header)
            plot_data = []
            plot_files = []
            for data in sorted(
                    vitems,
                    key=lambda x: x.get("lane", dd.get_sample_name(x))):
                validations = [
                    variant.get(vkey) for variant in data.get("variants", [])
                ]
                validations = [v for v in validations if v]
                if len(validations) == 0 and vkey in data:
                    validations = [data.get(vkey)]
                for validate in validations:
                    if validate:
                        validate["grading_summary"] = out_csv
                        if validate.get("grading"):
                            for row in _get_validate_plotdata_yaml(
                                    validate["grading"], data):
                                writer.writerow(row)
                                plot_data.append(row)
                        elif validate.get("summary") and not validate.get(
                                "summary") == "None":
                            if isinstance(validate["summary"], (list, tuple)):
                                plot_files.extend(
                                    list(set(validate["summary"])))
                            else:
                                plot_files.append(validate["summary"])
        if plot_files:
            plots = validateplot.classifyplot_from_plotfiles(
                plot_files, out_csv)
        elif plot_data:
            plots = validateplot.create(plot_data, header, 0, data["config"],
                                        os.path.splitext(out_csv)[0])
        else:
            plots = []
        for data in vitems:
            if data.get(vkey):
                data[vkey]["grading_plots"] = plots
            for variant in data.get("variants", []):
                if variant.get(vkey):
                    variant[vkey]["grading_plots"] = plots
            out.append([data])
    return out
Example #2
0
def summarize_grading(samples, vkey="validate"):
    """Provide summaries of grading results across all samples.

    Handles both traditional pipelines (validation part of variants) and CWL
    pipelines (validation at top level)
    """
    samples = list(utils.flatten(samples))
    if not _has_grading_info(samples, vkey):
        return [[d] for d in samples]
    validate_dir = utils.safe_makedir(os.path.join(samples[0]["dirs"]["work"], vkey))
    header = ["sample", "caller", "variant.type", "category", "value"]
    _summarize_combined(samples, vkey)
    validated, out = _group_validate_samples(samples, vkey,
                                             (["metadata", "validate_batch"], ["metadata", "batch"], ["description"]))
    for vname, vitems in validated.items():
        out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname)
        with open(out_csv, "w") as out_handle:
            writer = csv.writer(out_handle)
            writer.writerow(header)
            plot_data = []
            plot_files = []
            for data in sorted(vitems, key=lambda x: x.get("lane", dd.get_sample_name(x)) or ""):
                validations = [variant.get(vkey) for variant in data.get("variants", [])
                               if isinstance(variant, dict)]
                validations = [v for v in validations if v]
                if len(validations) == 0 and vkey in data:
                    validations = [data.get(vkey)]
                for validate in validations:
                    if validate:
                        validate["grading_summary"] = out_csv
                        if validate.get("grading"):
                            for row in _get_validate_plotdata_yaml(validate["grading"], data):
                                writer.writerow(row)
                                plot_data.append(row)
                        elif validate.get("summary") and not validate.get("summary") == "None":
                            if isinstance(validate["summary"], (list, tuple)):
                                plot_files.extend(list(set(validate["summary"])))
                            else:
                                plot_files.append(validate["summary"])
        if plot_files:
            plots = validateplot.classifyplot_from_plotfiles(plot_files, out_csv)
        elif plot_data:
            plots = validateplot.create(plot_data, header, 0, data["config"],
                                        os.path.splitext(out_csv)[0])
        else:
            plots = []
        for data in vitems:
            if data.get(vkey):
                data[vkey]["grading_plots"] = plots
            for variant in data.get("variants", []):
                if isinstance(variant, dict) and variant.get(vkey):
                    variant[vkey]["grading_plots"] = plots
            out.append([data])
    return out
Example #3
0
def summarize_grading(samples):
    """Provide summaries of grading results across all samples.
    """
    samples = [utils.to_single_data(d) for d in samples]
    if not _has_grading_info(samples):
        return [[d] for d in samples]
    validate_dir = utils.safe_makedir(
        os.path.join(samples[0]["dirs"]["work"], "validate"))
    header = ["sample", "caller", "variant.type", "category", "value"]
    validated, out = _group_validate_samples(samples)
    for vname, vitems in validated.iteritems():
        out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname)
        with open(out_csv, "w") as out_handle:
            writer = csv.writer(out_handle)
            writer.writerow(header)
            plot_data = []
            plot_files = []
            for data in sorted(
                    vitems,
                    key=lambda x: x.get("lane", dd.get_sample_name(x))):
                for variant in data.get("variants", []):
                    if variant.get("validate"):
                        variant["validate"]["grading_summary"] = out_csv
                        if tz.get_in(["validate", "grading"], variant):
                            for row in _get_validate_plotdata_yaml(
                                    variant, data):
                                writer.writerow(row)
                                plot_data.append(row)
                        elif tz.get_in(["validate", "summary"], variant):
                            plot_files.append(variant["validate"]["summary"])
        if plot_files:
            plots = validateplot.classifyplot_from_plotfiles(
                plot_files, out_csv)
        elif plot_data:
            plots = validateplot.create(plot_data, header, 0, data["config"],
                                        os.path.splitext(out_csv)[0])
        else:
            plots = None
        for data in vitems:
            for variant in data.get("variants", []):
                if variant.get("validate"):
                    variant["validate"]["grading_plots"] = plots
            out.append([data])
    return out
def summarize_grading(samples):
    """Provide summaries of grading results across all samples.
    """
    if not _has_grading_info(samples):
        return samples
    validate_dir = utils.safe_makedir(
        os.path.join(samples[0][0]["dirs"]["work"], "validate"))
    out_csv = os.path.join(validate_dir, "grading-summary.csv")
    header = ["sample", "caller", "variant.type", "category", "value"]
    out = []
    with open(out_csv, "w") as out_handle:
        writer = csv.writer(out_handle)
        writer.writerow(header)
        plot_num = 0
        for data in (x[0] for x in samples):
            plot_data = []
            for variant in data.get("variants", []):
                if variant.get("validate"):
                    variant["validate"]["grading_summary"] = out_csv
                    with open(variant["validate"]["grading"]) as in_handle:
                        grade_stats = yaml.load(in_handle)
                    for sample_stats in grade_stats:
                        sample = sample_stats["sample"]
                        for vtype, cat, val in _flatten_grading(sample_stats):
                            row = [
                                sample,
                                variant.get("variantcaller", ""), vtype, cat,
                                val
                            ]
                            writer.writerow(row)
                            plot_data.append(row)
            plots = (validateplot.create(
                plot_data, header, plot_num, data["config"],
                os.path.splitext(out_csv)[0]) if plot_data else None)
            if plots:
                plot_num += 1
                for variant in data.get("variants", []):
                    if variant.get("validate"):
                        variant["validate"]["grading_plots"] = plots
            out.append([data])
    return out
Example #5
0
def summarize_grading(samples):
    """Provide summaries of grading results across all samples.
    """
    samples = [utils.to_single_data(d) for d in samples]
    if not _has_grading_info(samples):
        return [[d] for d in samples]
    validate_dir = utils.safe_makedir(os.path.join(samples[0]["dirs"]["work"], "validate"))
    header = ["sample", "caller", "variant.type", "category", "value"]
    validated, out = _group_validate_samples(samples)
    for vname, vitems in validated.iteritems():
        out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname)
        with open(out_csv, "w") as out_handle:
            writer = csv.writer(out_handle)
            writer.writerow(header)
            plot_data = []
            plot_files = []
            for data in sorted(vitems, key=lambda x: x.get("lane", dd.get_sample_name(x))):
                for variant in data.get("variants", []):
                    if variant.get("validate"):
                        variant["validate"]["grading_summary"] = out_csv
                        if tz.get_in(["validate", "grading"], variant):
                            for row in _get_validate_plotdata_yaml(variant, data):
                                writer.writerow(row)
                                plot_data.append(row)
                        elif tz.get_in(["validate", "summary"], variant):
                            plot_files.append(variant["validate"]["summary"])
        if plot_files:
            plots = validateplot.classifyplot_from_plotfiles(plot_files, out_csv)
        elif plot_data:
            plots = validateplot.create(plot_data, header, 0, data["config"],
                                        os.path.splitext(out_csv)[0])
        else:
            plots = None
        for data in vitems:
            for variant in data.get("variants", []):
                if variant.get("validate"):
                    variant["validate"]["grading_plots"] = plots
            out.append([data])
    return out
Example #6
0
def summarize_grading(samples):
    """Provide summaries of grading results across all samples.
    """
    if not _has_grading_info(samples):
        return samples
    validate_dir = utils.safe_makedir(os.path.join(samples[0][0]["dirs"]["work"], "validate"))
    header = ["sample", "caller", "variant.type", "category", "value"]
    validated, out = _group_validate_samples(samples)
    for vname, vitems in validated.iteritems():
        out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname)
        with open(out_csv, "w") as out_handle:
            writer = csv.writer(out_handle)
            writer.writerow(header)
            plot_data = []
            for data in vitems:
                for variant in data.get("variants", []):
                    if variant.get("validate"):
                        variant["validate"]["grading_summary"] = out_csv
                        with open(variant["validate"]["grading"]) as in_handle:
                            grade_stats = yaml.load(in_handle)
                        for sample_stats in grade_stats:
                            sample = sample_stats["sample"]
                            for vtype, cat, val in _flatten_grading(sample_stats):
                                row = [sample, variant.get("variantcaller", ""), vtype, cat, val]
                                writer.writerow(row)
                                plot_data.append(row)
            plots = (
                validateplot.create(plot_data, header, 0, data["config"], os.path.splitext(out_csv)[0])
                if plot_data
                else None
            )
            for data in vitems:
                for variant in data.get("variants", []):
                    if variant.get("validate"):
                        variant["validate"]["grading_plots"] = plots
                out.append([data])
    return out